Skip to main content

gen_models/
block_group.rs

1use std::{
2    collections::{HashMap, HashSet},
3    hash::Hash,
4    rc::Rc,
5};
6
7use gen_core::{
8    HashId, INDETERMINATE_CHROMOSOME_INDEX, NO_CHROMOSOME_INDEX, NodeIntervalBlock,
9    PATH_END_NODE_ID, PATH_START_NODE_ID, PRESERVE_EDIT_SITE_CHROMOSOME_INDEX, PathBlock, Strand,
10    calculate_hash, is_end_node, is_start_node, is_terminal, traits::Capnp,
11};
12use gen_graph::{
13    GenGraph, GraphNode, all_intermediate_edges, all_reachable_nodes, all_simple_paths,
14    flatten_to_interval_tree,
15};
16use intervaltree::IntervalTree;
17use rusqlite::{Row, params, types::Value as SQLValue};
18use serde::{Deserialize, Serialize};
19
20use crate::{
21    accession::{Accession, AccessionEdge, AccessionEdgeData, AccessionPath},
22    block_group_edge::{AugmentedEdgeData, BlockGroupEdge, BlockGroupEdgeData},
23    db::GraphConnection,
24    edge::{Edge, EdgeData, GroupBlock},
25    errors::{ChangeError, QueryError},
26    gen_models_capnp::block_group,
27    node::Node,
28    path::{Path, PathData},
29    path_edge::PathEdge,
30    sample::Sample,
31    traits::*,
32};
33
34#[derive(Clone, Debug, Deserialize, Eq, Hash, Serialize, PartialEq)]
35pub struct BlockGroup {
36    pub id: HashId,
37    pub collection_name: String,
38    pub sample_name: String,
39    pub name: String,
40    pub created_on: i64,
41    pub parent_block_group_id: Option<HashId>,
42    pub is_default: bool,
43}
44
45impl<'a> Capnp<'a> for BlockGroup {
46    type Builder = block_group::Builder<'a>;
47    type Reader = block_group::Reader<'a>;
48
49    fn write_capnp(&self, builder: &mut Self::Builder) {
50        builder.set_id(&self.id.0).unwrap();
51        builder.set_collection_name(&self.collection_name);
52        builder.set_sample_name(&self.sample_name);
53        builder.set_name(&self.name);
54        builder.set_created_on(self.created_on);
55        match &self.parent_block_group_id {
56            None => {
57                builder.reborrow().get_parent_block_group_id().set_none(());
58            }
59            Some(parent_block_group_id) => {
60                builder
61                    .reborrow()
62                    .get_parent_block_group_id()
63                    .set_some(&parent_block_group_id.0)
64                    .unwrap();
65            }
66        }
67        builder.set_is_default(self.is_default);
68    }
69
70    fn read_capnp(reader: Self::Reader) -> Self {
71        let id = reader
72            .get_id()
73            .unwrap()
74            .as_slice()
75            .unwrap()
76            .try_into()
77            .unwrap();
78        let collection_name = reader.get_collection_name().unwrap().to_string().unwrap();
79        let sample_name = reader.get_sample_name().unwrap().to_string().unwrap();
80        let name = reader.get_name().unwrap().to_string().unwrap();
81        let created_on = reader.get_created_on();
82        let parent_block_group_id =
83            reader
84                .get_parent_block_group_id()
85                .which()
86                .ok()
87                .and_then(|parent_block_group_id| match parent_block_group_id {
88                    block_group::parent_block_group_id::None(()) => None,
89                    block_group::parent_block_group_id::Some(parent_block_group_id) => {
90                        if let Ok(parent_block_group_id) = parent_block_group_id {
91                            if let Some(parent_block_group_id) = parent_block_group_id.as_slice() {
92                                parent_block_group_id.try_into().ok()
93                            } else {
94                                None
95                            }
96                        } else {
97                            None
98                        }
99                    }
100                });
101        let is_default = reader.get_is_default();
102
103        BlockGroup {
104            id,
105            collection_name,
106            sample_name,
107            name,
108            created_on,
109            parent_block_group_id,
110            is_default,
111        }
112    }
113}
114
115#[derive(Clone, Debug, Eq, Hash, PartialEq)]
116pub struct BlockGroupData<'a> {
117    pub collection_name: &'a str,
118    pub sample_name: &'a str,
119    pub name: String,
120}
121
122#[derive(Clone, Debug, Default)]
123pub struct NewBlockGroup<'a> {
124    pub collection_name: &'a str,
125    pub sample_name: &'a str,
126    pub name: &'a str,
127    pub parent_block_group_id: Option<&'a HashId>,
128    pub is_default: bool,
129}
130
131#[derive(Clone, Debug)]
132pub struct PathChange {
133    pub block_group_id: HashId,
134    pub path: Path,
135    pub path_accession: Option<String>,
136    pub start: i64,
137    pub end: i64,
138    pub block: PathBlock,
139    pub chromosome_index: i64,
140    pub phased: i64,
141    pub preserve_edge: bool,
142}
143
144pub struct PathCache<'a> {
145    pub cache: HashMap<PathData, Path>,
146    pub intervaltree_cache: HashMap<Path, IntervalTree<i64, NodeIntervalBlock>>,
147    pub conn: &'a GraphConnection,
148}
149
150impl<'a> PathCache<'a> {
151    pub fn new(conn: &'a GraphConnection) -> PathCache<'a> {
152        PathCache {
153            cache: HashMap::<PathData, Path>::new(),
154            intervaltree_cache: HashMap::<Path, IntervalTree<i64, NodeIntervalBlock>>::new(),
155            conn,
156        }
157    }
158
159    pub fn lookup(path_cache: &mut PathCache, block_group_id: &HashId, name: String) -> Path {
160        let path_key = PathData {
161            name: name.clone(),
162            block_group_id: *block_group_id,
163        };
164        let path_lookup = path_cache.cache.get(&path_key);
165        if let Some(path) = path_lookup {
166            path.clone()
167        } else {
168            let conn = path_cache.conn;
169            let new_path = Path::query(
170                conn,
171                "select * from paths where block_group_id = ?1 AND name = ?2",
172                params![block_group_id, name],
173            )[0]
174            .clone();
175
176            path_cache.cache.insert(path_key, new_path.clone());
177            let tree = new_path.intervaltree(conn);
178            path_cache.intervaltree_cache.insert(new_path.clone(), tree);
179            new_path
180        }
181    }
182
183    pub fn get_intervaltree<'b>(
184        path_cache: &'b mut PathCache<'_>,
185        path: &Path,
186    ) -> &'b IntervalTree<i64, NodeIntervalBlock> {
187        if !path_cache.intervaltree_cache.contains_key(path) {
188            let tree = path.intervaltree(path_cache.conn);
189            path_cache.intervaltree_cache.insert(path.clone(), tree);
190        }
191
192        path_cache.intervaltree_cache.get(path).unwrap()
193    }
194}
195
196impl BlockGroup {
197    pub fn create(conn: &GraphConnection, new_block_group: NewBlockGroup<'_>) -> BlockGroup {
198        Sample::get_or_create(conn, new_block_group.sample_name);
199        let hash = BlockGroup::get_id(
200            new_block_group.collection_name,
201            new_block_group.sample_name,
202            new_block_group.name,
203            new_block_group.parent_block_group_id,
204        );
205        let timestamp = chrono::Utc::now().timestamp_nanos_opt().unwrap();
206        let query = "INSERT INTO block_groups (
207                id,
208                collection_name,
209                sample_name,
210                name,
211                created_on,
212                parent_block_group_id,
213                is_default
214            ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7);";
215        let mut stmt = conn.prepare(query).unwrap();
216        let bg = BlockGroup {
217            id: hash,
218            collection_name: new_block_group.collection_name.to_string(),
219            sample_name: new_block_group.sample_name.to_string(),
220            name: new_block_group.name.to_string(),
221            created_on: timestamp,
222            parent_block_group_id: new_block_group.parent_block_group_id.copied(),
223            is_default: new_block_group.is_default,
224        };
225        match stmt.execute(params![
226            hash,
227            new_block_group.collection_name,
228            new_block_group.sample_name,
229            new_block_group.name,
230            timestamp,
231            new_block_group.parent_block_group_id,
232            new_block_group.is_default,
233        ]) {
234            Ok(_) => bg,
235            Err(rusqlite::Error::SqliteFailure(err, _details)) => {
236                if err.code == rusqlite::ErrorCode::ConstraintViolation {
237                    bg
238                } else {
239                    panic!("something bad happened querying the database")
240                }
241            }
242            Err(_) => {
243                panic!("something bad happened querying the database")
244            }
245        }
246    }
247
248    pub fn delete(conn: &GraphConnection, collection_name: &str, sample_name: &str, name: &str) {
249        let query = "delete from block_groups where collection_name = ?1 and sample_name = ?2 and name = ?3;";
250        conn.execute(
251            query,
252            params![
253                collection_name.to_string(),
254                sample_name.to_string(),
255                name.to_string()
256            ],
257        )
258        .unwrap();
259    }
260
261    pub fn get_by_id(conn: &GraphConnection, id: &HashId) -> BlockGroup {
262        let query = "SELECT * FROM block_groups WHERE id = ?1";
263        let mut stmt = conn.prepare(query).unwrap();
264        match stmt.query_row([id], |row| Ok(Self::process_row(row))) {
265            Ok(res) => res,
266            Err(rusqlite::Error::QueryReturnedNoRows) => panic!("No block group with id {id}"),
267            Err(_) => {
268                panic!("something bad happened querying the database")
269            }
270        }
271    }
272
273    fn copy_paths_and_accessions_into(
274        &self,
275        conn: &GraphConnection,
276        target_block_group_id: &HashId,
277    ) {
278        let existing_paths = Path::query(
279            conn,
280            "SELECT * from paths where block_group_id = ?1;",
281            params![self.id],
282        );
283
284        let mut path_map = HashMap::new();
285        for path in &existing_paths {
286            let edge_ids = PathEdge::edges_for_path(conn, &path.id)
287                .into_iter()
288                .map(|edge| edge.id)
289                .collect::<Vec<_>>();
290            let new_path = Path::create(conn, &path.name, target_block_group_id, &edge_ids);
291            path_map.insert(path.id, new_path.id);
292        }
293
294        for accession in Accession::query(
295            conn,
296            "select * from accessions where path_id IN rarray(?1);",
297            rusqlite::params!(Rc::new(
298                existing_paths
299                    .iter()
300                    .map(|path| SQLValue::from(path.id))
301                    .collect::<Vec<SQLValue>>()
302            )),
303        ) {
304            let edges = AccessionPath::query(
305                conn,
306                "Select * from accession_paths where accession_id = ?1 order by index_in_path ASC;",
307                rusqlite::params!(SQLValue::from(accession.id)),
308            );
309            let new_path_id = path_map.get(&accession.path_id).unwrap();
310            let obj = Accession::get_or_create(
311                conn,
312                &accession.name,
313                new_path_id,
314                accession.parent_accession_id.as_ref(),
315            );
316            AccessionPath::create(
317                conn,
318                &obj.id,
319                &edges.iter().map(|ap| ap.edge_id).collect::<Vec<_>>(),
320            );
321        }
322    }
323
324    pub fn get_or_create_sample_block_groups(
325        conn: &GraphConnection,
326        collection_name: &str,
327        sample_name: &str,
328        group_name: &str,
329        parent_samples: Vec<String>,
330    ) -> Result<Vec<BlockGroup>, QueryError> {
331        let existing_block_groups = BlockGroup::query(
332            conn,
333            "select * from block_groups
334             where collection_name = ?1 AND sample_name = ?2 AND name = ?3
335             order by created_on, id",
336            params![collection_name, sample_name, group_name],
337        );
338
339        if !existing_block_groups.is_empty() {
340            return Ok(existing_block_groups);
341        }
342
343        let parent_block_groups = BlockGroup::find_parent_block_groups(
344            conn,
345            collection_name,
346            sample_name,
347            group_name,
348            &parent_samples,
349        );
350
351        if parent_block_groups.is_empty() {
352            return Ok(vec![BlockGroup::create(
353                conn,
354                NewBlockGroup {
355                    collection_name,
356                    sample_name,
357                    name: group_name,
358                    ..Default::default()
359                },
360            )]);
361        }
362
363        let new_block_groups = parent_block_groups
364            .iter()
365            .map(|parent_block_group| {
366                let new_block_group = BlockGroup::create(
367                    conn,
368                    NewBlockGroup {
369                        collection_name,
370                        sample_name,
371                        name: group_name,
372                        parent_block_group_id: Some(&parent_block_group.id),
373                        ..Default::default()
374                    },
375                );
376                new_block_group.copy_contents_from(conn, parent_block_group);
377                new_block_group
378            })
379            .collect::<Vec<_>>();
380
381        Ok(new_block_groups)
382    }
383
384    pub fn find_parent_block_groups(
385        conn: &GraphConnection,
386        collection_name: &str,
387        sample_name: &str,
388        group_name: &str,
389        parent_samples: &[String],
390    ) -> Vec<BlockGroup> {
391        let parent_samples = if parent_samples.is_empty() {
392            Sample::get_parent_names(conn, sample_name)
393        } else {
394            parent_samples.to_vec()
395        };
396        if parent_samples.is_empty() {
397            return vec![];
398        }
399
400        BlockGroup::query(
401            conn,
402            "select * from block_groups
403             where collection_name = ?1 AND sample_name IN rarray(?2) AND name = ?3
404             order by sample_name, created_on, id",
405            params![
406                collection_name,
407                Rc::new(
408                    parent_samples
409                        .iter()
410                        .cloned()
411                        .map(SQLValue::from)
412                        .collect::<Vec<_>>()
413                ),
414                group_name
415            ],
416        )
417    }
418
419    fn copy_contents_from(&self, conn: &GraphConnection, source_block_group: &BlockGroup) {
420        let new_block_group_edges =
421            BlockGroupEdge::edges_for_block_group(conn, &source_block_group.id)
422                .into_iter()
423                .map(|edge| BlockGroupEdgeData {
424                    block_group_id: self.id,
425                    edge_id: edge.edge.id,
426                    chromosome_index: edge.chromosome_index,
427                    phased: edge.phased,
428                })
429                .collect::<Vec<_>>();
430        BlockGroupEdge::bulk_create(conn, &new_block_group_edges);
431        source_block_group.copy_paths_and_accessions_into(conn, &self.id);
432    }
433
434    pub fn get_id(
435        collection_name: &str,
436        sample_name: &str,
437        group_name: &str,
438        parent_block_group_id: Option<&HashId>,
439    ) -> HashId {
440        let lineage_key = parent_block_group_id
441            .map(ToString::to_string)
442            .unwrap_or_default();
443        HashId(calculate_hash(&format!(
444            "{collection_name}:{sample_name}:{group_name}:{lineage_key}"
445        )))
446    }
447
448    pub fn get_graph(conn: &GraphConnection, block_group_id: &HashId) -> GenGraph {
449        let edges = BlockGroupEdge::edges_for_block_group(conn, block_group_id);
450        let blocks = Edge::blocks_from_edges(conn, &edges);
451        let (graph, _) = Edge::build_graph(&edges, &blocks);
452        graph
453    }
454
455    pub fn prune_graph(graph: &mut GenGraph) {
456        // Prunes a graph by removing edges on the same chromosome_index. This means if 2 edges are
457        // both "chromosome index 0", we keep the newer one.
458        let mut root_nodes = HashSet::new();
459        let mut edges_to_remove: Vec<(GraphNode, GraphNode)> = vec![];
460        for node in graph.nodes() {
461            if node.node_id == PATH_START_NODE_ID {
462                root_nodes.insert(node);
463            }
464            let mut edges_by_ci: HashMap<i64, (GraphNode, GraphNode, i64)> = HashMap::new();
465            for (source_node, target_node, edge_weights) in graph.edges(node) {
466                for edge_weight in edge_weights {
467                    if edge_weight.chromosome_index == NO_CHROMOSOME_INDEX {
468                        continue;
469                    }
470                    if edge_weight.chromosome_index == INDETERMINATE_CHROMOSOME_INDEX {
471                        continue;
472                    }
473                    if edge_weight.chromosome_index == PRESERVE_EDIT_SITE_CHROMOSOME_INDEX {
474                        edges_to_remove.push((source_node, target_node));
475                        continue;
476                    }
477                    edges_by_ci
478                        .entry(edge_weight.chromosome_index)
479                        .and_modify(|(source, target, created_on)| {
480                            if edge_weight.created_on > *created_on {
481                                edges_to_remove.push((*source, *target));
482                                *source = source_node;
483                                *target = target_node;
484                                *created_on = edge_weight.created_on;
485                            } else {
486                                edges_to_remove.push((source_node, target_node));
487                            }
488                        })
489                        .or_insert((source_node, target_node, edge_weight.created_on));
490                }
491            }
492        }
493
494        for (source, target) in edges_to_remove.iter() {
495            graph.remove_edge(*source, *target);
496        }
497
498        let reachable_nodes = all_reachable_nodes(&*graph, &Vec::from_iter(root_nodes));
499        let mut to_remove = vec![];
500        for node in graph.nodes() {
501            if !reachable_nodes.contains(&node) {
502                to_remove.push(node);
503            }
504        }
505        for node in to_remove {
506            graph.remove_node(node);
507        }
508    }
509
510    pub fn get_all_sequences(
511        conn: &GraphConnection,
512        block_group_id: &HashId,
513        _prune: bool,
514    ) -> HashSet<String> {
515        let edges = BlockGroupEdge::edges_for_block_group(conn, block_group_id)
516            .into_iter()
517            .filter(|edge| edge.chromosome_index != PRESERVE_EDIT_SITE_CHROMOSOME_INDEX)
518            .collect::<Vec<_>>();
519        let blocks = Edge::blocks_from_edges(conn, &edges);
520
521        let (mut graph, _) = Edge::build_graph(&edges, &blocks);
522        BlockGroup::prune_graph(&mut graph);
523
524        let mut start_nodes = vec![];
525        let mut end_nodes = vec![];
526        for node in graph.nodes() {
527            if is_start_node(node.node_id) {
528                start_nodes.push(node);
529            } else if is_end_node(node.node_id) {
530                end_nodes.push(node);
531            }
532        }
533        let blocks_by_node = blocks
534            .iter()
535            .map(|block| {
536                (
537                    GraphNode {
538                        node_id: block.node_id,
539                        sequence_start: block.start,
540                        sequence_end: block.end,
541                    },
542                    block,
543                )
544            })
545            .collect::<HashMap<GraphNode, &GroupBlock>>();
546        let mut sequences = HashSet::<String>::new();
547
548        for start_node in start_nodes {
549            for end_node in &end_nodes {
550                // TODO: maybe make all_simple_paths return a single path id where start == end
551                if start_node == *end_node {
552                    let block = blocks_by_node.get(&start_node).unwrap();
553                    if block.node_id != PATH_START_NODE_ID && block.node_id != PATH_END_NODE_ID {
554                        sequences.insert(block.sequence());
555                    }
556                } else {
557                    for path in all_simple_paths(&graph, start_node, *end_node) {
558                        let mut current_sequence = "".to_string();
559                        for node in path {
560                            let block = blocks_by_node.get(&node).unwrap();
561                            let block_sequence = block.sequence();
562                            current_sequence.push_str(&block_sequence);
563                        }
564                        sequences.insert(current_sequence);
565                    }
566                }
567            }
568        }
569
570        sequences
571    }
572
573    pub fn add_accession(
574        conn: &GraphConnection,
575        path: &Path,
576        name: &str,
577        start: i64,
578        end: i64,
579        cache: &mut PathCache,
580    ) -> Accession {
581        let tree = PathCache::get_intervaltree(cache, path);
582        let start_blocks: Vec<&NodeIntervalBlock> =
583            tree.query_point(start).map(|x| &x.value).collect();
584        assert_eq!(start_blocks.len(), 1);
585        let start_block = start_blocks[0];
586        let end_blocks: Vec<&NodeIntervalBlock> = tree.query_point(end).map(|x| &x.value).collect();
587        assert_eq!(end_blocks.len(), 1);
588        let end_block = end_blocks[0];
589        // we make a start/end edge for the accession start/end, then fill in the middle
590        // with any existing edges
591        let start_edge = AccessionEdgeData {
592            source_node_id: PATH_START_NODE_ID,
593            source_coordinate: -1,
594            source_strand: Strand::Forward,
595            target_node_id: start_block.node_id,
596            target_coordinate: start - start_block.start + start_block.sequence_start,
597            target_strand: Strand::Forward,
598            chromosome_index: 0,
599        };
600        let end_edge = AccessionEdgeData {
601            source_node_id: end_block.node_id,
602            source_coordinate: end - end_block.start + end_block.sequence_start,
603            source_strand: Strand::Forward,
604            target_node_id: PATH_END_NODE_ID,
605            target_coordinate: -1,
606            target_strand: Strand::Forward,
607            chromosome_index: 0,
608        };
609        let accession =
610            Accession::create(conn, name, &path.id, None).expect("Unable to create accession.");
611        let mut path_edges = vec![start_edge];
612        if start_block == end_block {
613            path_edges.push(end_edge);
614        } else {
615            let mut in_range = false;
616            let path_blocks: Vec<&NodeIntervalBlock> = tree
617                .iter_sorted()
618                .map(|x| &x.value)
619                .filter(|block| {
620                    if *block == start_block {
621                        in_range = true;
622                    } else if *block == end_block {
623                        in_range = false;
624                        return true;
625                    }
626                    in_range
627                })
628                .collect::<Vec<_>>();
629            // if start and end block are not the same, we will always have at least 2 elements in path_blocks
630            for (block, next_block) in path_blocks.iter().zip(path_blocks[1..].iter()) {
631                path_edges.push(AccessionEdgeData {
632                    source_node_id: block.node_id,
633                    source_coordinate: block.sequence_end,
634                    source_strand: block.strand,
635                    target_node_id: next_block.node_id,
636                    target_coordinate: next_block.sequence_start,
637                    target_strand: next_block.strand,
638                    chromosome_index: 0,
639                })
640            }
641            path_edges.push(end_edge);
642        }
643        AccessionPath::create(
644            conn,
645            &accession.id,
646            &AccessionEdge::bulk_create(conn, &path_edges),
647        );
648        accession
649    }
650
651    pub fn insert_changes(
652        conn: &GraphConnection,
653        changes: &[PathChange],
654        cache: &mut PathCache,
655        modify_blockgroup: bool,
656    ) -> Result<(), ChangeError> {
657        let mut new_augmented_edges_by_block_group =
658            HashMap::<&HashId, Vec<AugmentedEdgeData>>::new();
659        let mut new_accession_edges = HashMap::new();
660        let mut tree_map = HashMap::new();
661        for change in changes {
662            let tree = if modify_blockgroup {
663                tree_map.entry(change.block_group_id).or_insert_with(|| {
664                    BlockGroup::intervaltree_for(conn, &change.block_group_id, true)
665                })
666            } else {
667                PathCache::get_intervaltree(cache, &change.path)
668            };
669            let new_augmented_edges = BlockGroup::set_up_new_edges(change, tree)?;
670            new_augmented_edges_by_block_group
671                .entry(&change.block_group_id)
672                .and_modify(|new_edge_data| new_edge_data.extend(new_augmented_edges.clone()))
673                .or_insert_with(|| new_augmented_edges.clone());
674            if let Some(accession) = &change.path_accession {
675                new_accession_edges
676                    .entry((&change.path, accession))
677                    .and_modify(|new_edge_data: &mut Vec<AugmentedEdgeData>| {
678                        new_edge_data.extend(new_augmented_edges.clone())
679                    })
680                    .or_insert_with(|| new_augmented_edges.clone());
681            }
682        }
683
684        let mut edge_data_map = HashMap::new();
685
686        for (block_group_id, new_augmented_edges) in new_augmented_edges_by_block_group {
687            let new_edges = new_augmented_edges
688                .iter()
689                .map(|augmented_edge| augmented_edge.edge_data)
690                .collect::<Vec<_>>();
691            let edge_ids = Edge::bulk_create(conn, &new_edges);
692            for (i, edge_data) in new_edges.iter().enumerate() {
693                edge_data_map.insert(*edge_data, edge_ids[i]);
694            }
695            let new_block_group_edges = edge_ids
696                .iter()
697                .enumerate()
698                .map(|(i, edge_id)| BlockGroupEdgeData {
699                    block_group_id: *block_group_id,
700                    edge_id: *edge_id,
701                    chromosome_index: new_augmented_edges[i].chromosome_index,
702                    phased: new_augmented_edges[i].phased,
703                })
704                .collect::<Vec<_>>();
705            BlockGroupEdge::bulk_create(conn, &new_block_group_edges);
706        }
707
708        for ((path, accession_name), path_edges) in new_accession_edges {
709            match Accession::get(
710                conn,
711                "select * from accessions where name = ?1 AND path_id = ?2",
712                params![accession_name, path.id],
713            ) {
714                Ok(_) => {
715                    println!(
716                        "accession already exists, consider a better matching algorithm to determine if this is an error."
717                    );
718                }
719                Err(_) => {
720                    let acc_edges = AccessionEdge::bulk_create(
721                        conn,
722                        &path_edges
723                            .iter()
724                            .map(AccessionEdgeData::from)
725                            .collect::<Vec<_>>(),
726                    );
727                    let acc = Accession::create(conn, accession_name, &path.id, None)
728                        .expect("Accession could not be created.");
729                    AccessionPath::create(conn, &acc.id, &acc_edges);
730                }
731            }
732        }
733        Ok(())
734    }
735
736    #[allow(clippy::ptr_arg)]
737    #[allow(clippy::needless_late_init)]
738    pub fn insert_change(
739        conn: &GraphConnection,
740        change: &PathChange,
741        tree: &IntervalTree<i64, NodeIntervalBlock>,
742    ) -> Result<(), ChangeError> {
743        let new_augmented_edges = BlockGroup::set_up_new_edges(change, tree)?;
744        let new_edges = new_augmented_edges
745            .iter()
746            .map(|augmented_edge| augmented_edge.edge_data)
747            .collect::<Vec<_>>();
748        let edge_ids = Edge::bulk_create(conn, &new_edges);
749        let new_block_group_edges = edge_ids
750            .iter()
751            .enumerate()
752            .map(|(i, edge_id)| BlockGroupEdgeData {
753                block_group_id: change.block_group_id,
754                edge_id: *edge_id,
755                chromosome_index: new_augmented_edges[i].chromosome_index,
756                phased: new_augmented_edges[i].phased,
757            })
758            .collect::<Vec<_>>();
759        BlockGroupEdge::bulk_create(conn, &new_block_group_edges);
760        Ok(())
761    }
762
763    fn set_up_new_edges(
764        change: &PathChange,
765        tree: &IntervalTree<i64, NodeIntervalBlock>,
766    ) -> Result<Vec<AugmentedEdgeData>, ChangeError> {
767        let start_blocks: Vec<&NodeIntervalBlock> =
768            tree.query_point(change.start).map(|x| &x.value).collect();
769        assert_eq!(start_blocks.len(), 1);
770        // NOTE: This may not be used but needs to be initialized here instead of inside the if
771        // statement that uses it, so that the borrow checker is happy
772        let previous_start_blocks: Vec<&NodeIntervalBlock> = tree
773            .query_point(change.start - 1)
774            .map(|x| &x.value)
775            .collect();
776        assert_eq!(previous_start_blocks.len(), 1);
777        let start_block = if start_blocks[0].start == change.start {
778            // First part of this block will be replaced/deleted, need to get previous block to add
779            // edge including it
780            previous_start_blocks[0]
781        } else {
782            start_blocks[0]
783        };
784
785        // Ensure the change is within the path bounds. The logic here is a bit backwards, where
786        // we check if the start is before the start block's end and the end is before the end
787        // block's start. This is because the terminal blocks start and end at the bounds of the
788        // interval tree. So while it's ok to have a start/end block be the start/end block (for
789        // changes at the extremes, it's not ok for the change to start beyond the current
790        // boundaries.
791        if is_start_node(start_block.node_id) && change.start < start_block.end {
792            return Err(ChangeError::OutOfBounds(format!(
793                "Invalid change specified. Coordinate {pos} is before start of path range ({path_pos}).",
794                pos = change.start,
795                path_pos = start_block.end
796            )));
797        }
798        let end_blocks: Vec<&NodeIntervalBlock> =
799            tree.query_point(change.end).map(|x| &x.value).collect();
800        assert_eq!(end_blocks.len(), 1);
801        let end_block = end_blocks[0];
802
803        if is_end_node(end_block.node_id) && change.end > end_block.start {
804            return Err(ChangeError::OutOfBounds(format!(
805                "Invalid change specified. Coordinate {pos} is before start of path range ({path_pos}).",
806                pos = change.end,
807                path_pos = end_block.start
808            )));
809        }
810
811        let mut new_edges = vec![];
812
813        if change.block.sequence_start == change.block.sequence_end {
814            // Deletion
815            let source_coordinate = change.start - start_block.start + start_block.sequence_start;
816            let target_coordinate = change.end - end_block.start + end_block.sequence_start;
817            let mut aug_edges = vec![];
818            let new_edge = EdgeData {
819                source_node_id: start_block.node_id,
820                source_coordinate,
821                source_strand: Strand::Forward,
822                target_node_id: end_block.node_id,
823                target_coordinate,
824                target_strand: Strand::Forward,
825            };
826            aug_edges.push(AugmentedEdgeData {
827                edge_data: new_edge,
828                chromosome_index: change.chromosome_index,
829                phased: change.phased,
830            });
831
832            // NOTE: If the deletion is happening at the very beginning of a path, we need to add
833            // an edge from the dedicated start node to the end of the deletion, to indicate it's
834            // another start point in the block group DAG.
835            if change.start == 0 {
836                let target_coordinate = change.end - end_block.start + end_block.sequence_start;
837                let new_beginning_edge = EdgeData {
838                    source_node_id: PATH_START_NODE_ID,
839                    source_coordinate: 0,
840                    source_strand: Strand::Forward,
841                    target_node_id: end_block.node_id,
842                    target_coordinate,
843                    target_strand: Strand::Forward,
844                };
845                aug_edges.push(AugmentedEdgeData {
846                    edge_data: new_beginning_edge,
847                    chromosome_index: change.chromosome_index,
848                    phased: change.phased,
849                });
850                if !is_terminal(end_block.node_id) {
851                    new_edges.push(AugmentedEdgeData {
852                        edge_data: EdgeData {
853                            source_node_id: end_block.node_id,
854                            source_coordinate: target_coordinate,
855                            source_strand: Strand::Forward,
856                            target_node_id: end_block.node_id,
857                            target_coordinate,
858                            target_strand: Strand::Forward,
859                        },
860                        chromosome_index: if change.preserve_edge {
861                            0
862                        } else {
863                            PRESERVE_EDIT_SITE_CHROMOSOME_INDEX
864                        },
865                        phased: 0,
866                    });
867                }
868            } else {
869                if !is_terminal(start_block.node_id) {
870                    new_edges.push(AugmentedEdgeData {
871                        edge_data: EdgeData {
872                            source_node_id: start_block.node_id,
873                            source_coordinate,
874                            source_strand: Strand::Forward,
875                            target_node_id: start_block.node_id,
876                            target_coordinate: source_coordinate,
877                            target_strand: Strand::Forward,
878                        },
879                        chromosome_index: if change.preserve_edge {
880                            0
881                        } else {
882                            PRESERVE_EDIT_SITE_CHROMOSOME_INDEX
883                        },
884                        phased: 0,
885                    });
886                };
887                if !is_terminal(end_block.node_id) {
888                    new_edges.push(AugmentedEdgeData {
889                        edge_data: EdgeData {
890                            source_node_id: end_block.node_id,
891                            source_coordinate: target_coordinate,
892                            source_strand: Strand::Forward,
893                            target_node_id: end_block.node_id,
894                            target_coordinate,
895                            target_strand: Strand::Forward,
896                        },
897                        chromosome_index: if change.preserve_edge {
898                            0
899                        } else {
900                            PRESERVE_EDIT_SITE_CHROMOSOME_INDEX
901                        },
902                        phased: 0,
903                    });
904                }
905            }
906            new_edges.extend(aug_edges);
907        // NOTE: If the deletion is happening at the very end of a path, we might add an edge
908        // from the beginning of the deletion to the dedicated end node, but in practice it
909        // doesn't affect sequence readouts, so it may not be worth it.
910        } else {
911            // Insertion/replacement
912            let insertion_start_coordinate =
913                change.start - start_block.start + start_block.sequence_start;
914            let new_start_edge = EdgeData {
915                source_node_id: start_block.node_id,
916                source_coordinate: insertion_start_coordinate,
917                source_strand: Strand::Forward,
918                target_node_id: change.block.node_id,
919                target_coordinate: change.block.sequence_start,
920                target_strand: Strand::Forward,
921            };
922            let new_augmented_start_edge = AugmentedEdgeData {
923                edge_data: new_start_edge,
924                chromosome_index: change.chromosome_index,
925                phased: change.phased,
926            };
927            let insertion_end_coordinate = change.end - end_block.start + end_block.sequence_start;
928            let new_end_edge = EdgeData {
929                source_node_id: change.block.node_id,
930                source_coordinate: change.block.sequence_end,
931                source_strand: Strand::Forward,
932                target_node_id: end_block.node_id,
933                target_coordinate: insertion_end_coordinate,
934                target_strand: Strand::Forward,
935            };
936            let new_augmented_end_edge = AugmentedEdgeData {
937                edge_data: new_end_edge,
938                chromosome_index: change.chromosome_index,
939                phased: change.phased,
940            };
941
942            if change.start == 0 {
943                new_edges.push(AugmentedEdgeData {
944                    edge_data: EdgeData {
945                        source_node_id: PATH_START_NODE_ID,
946                        source_coordinate: 0,
947                        source_strand: Strand::Forward,
948                        target_node_id: change.block.node_id,
949                        target_coordinate: change.block.sequence_start,
950                        target_strand: Strand::Forward,
951                    },
952                    chromosome_index: change.chromosome_index,
953                    phased: 0,
954                });
955            }
956
957            if !is_terminal(start_block.node_id) {
958                new_edges.push(AugmentedEdgeData {
959                    edge_data: EdgeData {
960                        source_node_id: start_block.node_id,
961                        source_coordinate: insertion_start_coordinate,
962                        source_strand: Strand::Forward,
963                        target_node_id: start_block.node_id,
964                        target_coordinate: insertion_start_coordinate,
965                        target_strand: Strand::Forward,
966                    },
967                    chromosome_index: if change.preserve_edge {
968                        0
969                    } else {
970                        PRESERVE_EDIT_SITE_CHROMOSOME_INDEX
971                    },
972                    phased: 0,
973                });
974            }
975            if !is_terminal(end_block.node_id) {
976                new_edges.push(AugmentedEdgeData {
977                    edge_data: EdgeData {
978                        source_node_id: end_block.node_id,
979                        source_coordinate: insertion_end_coordinate,
980                        source_strand: Strand::Forward,
981                        target_node_id: end_block.node_id,
982                        target_coordinate: insertion_end_coordinate,
983                        target_strand: Strand::Forward,
984                    },
985                    chromosome_index: if change.preserve_edge {
986                        0
987                    } else {
988                        PRESERVE_EDIT_SITE_CHROMOSOME_INDEX
989                    },
990                    phased: 0,
991                });
992            }
993
994            new_edges.push(new_augmented_start_edge);
995            new_edges.push(new_augmented_end_edge);
996        }
997
998        Ok(new_edges)
999    }
1000
1001    pub fn intervaltree_for(
1002        conn: &GraphConnection,
1003        block_group_id: &HashId,
1004        remove_ambiguous_positions: bool,
1005    ) -> IntervalTree<i64, NodeIntervalBlock> {
1006        // make a tree where every node has a span in the graph.
1007        let mut graph = BlockGroup::get_graph(conn, block_group_id);
1008        BlockGroup::prune_graph(&mut graph);
1009        flatten_to_interval_tree(&graph, remove_ambiguous_positions)
1010    }
1011
1012    pub fn get_current_path(conn: &GraphConnection, block_group_id: &HashId) -> Path {
1013        let paths = Path::query(
1014            conn,
1015            "SELECT * FROM paths WHERE block_group_id = ?1 ORDER BY created_on DESC",
1016            params![block_group_id],
1017        );
1018        paths[0].clone()
1019    }
1020
1021    pub fn get_path_by_name(
1022        conn: &GraphConnection,
1023        block_group_id: &HashId,
1024        path_name: &str,
1025    ) -> Option<Path> {
1026        let paths = Path::query(
1027            conn,
1028            "SELECT * FROM paths WHERE block_group_id = ?1 ORDER BY created_on DESC",
1029            params![block_group_id],
1030        );
1031
1032        for path in &paths {
1033            if path.name == path_name {
1034                return Some(path.clone());
1035            }
1036        }
1037
1038        None
1039    }
1040
1041    #[allow(clippy::too_many_arguments)]
1042    pub fn derive_subgraph(
1043        conn: &GraphConnection,
1044        collection_name: &str,
1045        child_sample_name: &str,
1046        source_block_group_id: &HashId,
1047        start_block: &NodeIntervalBlock,
1048        end_block: &NodeIntervalBlock,
1049        start_node_coordinate: i64,
1050        end_node_coordinate: i64,
1051        target_block_group_id: &HashId,
1052        create_terminal_edges: bool,
1053    ) -> HashMap<HashId, HashId> {
1054        let current_graph = BlockGroup::get_graph(conn, source_block_group_id);
1055        let start_node = current_graph
1056            .nodes()
1057            .find(|node| {
1058                node.node_id == start_block.node_id
1059                    && node.sequence_start <= start_node_coordinate
1060                    && node.sequence_end >= start_node_coordinate
1061            })
1062            .unwrap();
1063        let end_node = current_graph
1064            .nodes()
1065            .find(|node| {
1066                node.node_id == end_block.node_id
1067                    && node.sequence_start <= end_node_coordinate
1068                    && node.sequence_end >= end_node_coordinate
1069            })
1070            .unwrap();
1071        let subgraph_edges = all_intermediate_edges(&current_graph, start_node, end_node);
1072
1073        // Filter out internal edges (boundary edges) that don't exist in the database
1074        let subgraph_edge_ids = subgraph_edges
1075            .iter()
1076            .map(|(_to, _from, edge_info)| edge_info[0].edge_id)
1077            .collect::<Vec<_>>();
1078        let source_edges = Edge::query_by_ids(conn, &subgraph_edge_ids);
1079
1080        // Instead of reusing the existing edges, we create copies of the nodes and edges.  This
1081        // makes it easier to recombine subgraphs later.
1082        // TODO: Annotation support. We do not reuse node ids because nodes control the graph
1083        // topology. If we duplicated nodes via these operations, it would leave to unintentional
1084        // cycles. A node cannot exist in 2 places. This will be non-intuitive for annotation
1085        // propagation and needs to be handled.
1086        let mut old_node_ids = HashSet::new();
1087        for source_edge in &source_edges {
1088            old_node_ids.insert(source_edge.source_node_id);
1089            old_node_ids.insert(source_edge.target_node_id);
1090        }
1091
1092        old_node_ids.insert(start_block.node_id);
1093        old_node_ids.insert(end_block.node_id);
1094
1095        let old_nodes = Node::query_by_ids(conn, &old_node_ids);
1096        let old_nodes_by_id = old_nodes
1097            .iter()
1098            .map(|node| (node.id, node))
1099            .collect::<HashMap<_, _>>();
1100
1101        let mut new_node_ids_by_old = HashMap::new();
1102        let old_start_node = old_nodes_by_id.get(&start_block.node_id).unwrap();
1103        let new_start_node_hash = HashId(calculate_hash(&format!(
1104            "{}.{}.{}.bg-{}",
1105            collection_name, child_sample_name, old_start_node.id, target_block_group_id
1106        )));
1107        new_node_ids_by_old.insert(
1108            old_start_node.id,
1109            Node::create(conn, &old_start_node.sequence_hash, &new_start_node_hash),
1110        );
1111        let old_end_node = old_nodes_by_id.get(&end_block.node_id).unwrap();
1112        let new_end_node_hash = HashId(calculate_hash(&format!(
1113            "{}.{}.{}.bg-{}",
1114            collection_name, child_sample_name, old_end_node.id, target_block_group_id
1115        )));
1116        new_node_ids_by_old.insert(
1117            old_end_node.id,
1118            Node::create(conn, &old_end_node.sequence_hash, &new_end_node_hash),
1119        );
1120
1121        let mut new_edges = vec![];
1122        for source_edge in &source_edges {
1123            let new_source_node_id = *new_node_ids_by_old
1124                .entry(source_edge.source_node_id)
1125                .or_insert_with(|| {
1126                    let old_source_node = old_nodes_by_id.get(&source_edge.source_node_id).unwrap();
1127                    let new_source_node_hash = HashId(calculate_hash(&format!(
1128                        "{}.{}.{}.bg-{}",
1129                        collection_name,
1130                        child_sample_name,
1131                        old_source_node.id,
1132                        target_block_group_id
1133                    )));
1134                    Node::create(conn, &old_source_node.sequence_hash, &new_source_node_hash)
1135                });
1136            let new_target_node_id = *new_node_ids_by_old
1137                .entry(source_edge.target_node_id)
1138                .or_insert_with(|| {
1139                    let old_target_node = old_nodes_by_id.get(&source_edge.target_node_id).unwrap();
1140                    let new_target_node_hash = HashId(calculate_hash(&format!(
1141                        "{}.{}.{}.bg-{}",
1142                        collection_name,
1143                        child_sample_name,
1144                        old_target_node.id,
1145                        target_block_group_id
1146                    )));
1147                    Node::create(conn, &old_target_node.sequence_hash, &new_target_node_hash)
1148                });
1149
1150            new_edges.push(EdgeData {
1151                source_node_id: new_source_node_id,
1152                source_coordinate: source_edge.source_coordinate,
1153                source_strand: source_edge.source_strand,
1154                target_node_id: new_target_node_id,
1155                target_coordinate: source_edge.target_coordinate,
1156                target_strand: source_edge.target_strand,
1157            });
1158        }
1159
1160        let new_edge_ids = Edge::bulk_create(conn, &new_edges);
1161        let new_edge_ids_by_old = source_edges
1162            .iter()
1163            .zip(new_edge_ids.iter())
1164            .map(|(source_edge, new_edge_id)| (&source_edge.id, new_edge_id))
1165            .collect::<HashMap<_, _>>();
1166
1167        let source_block_group_edges = BlockGroupEdge::specific_edges_for_block_group(
1168            conn,
1169            source_block_group_id,
1170            &subgraph_edge_ids,
1171        );
1172        let source_edge_ids = source_edges
1173            .iter()
1174            .map(|edge| &edge.id)
1175            .collect::<HashSet<_>>();
1176        let source_block_group_edges = source_block_group_edges
1177            .iter()
1178            .filter(|block_group_edge| source_edge_ids.contains(&block_group_edge.edge.id))
1179            .collect::<Vec<_>>();
1180        let source_block_group_edges_by_edge_id = source_block_group_edges
1181            .iter()
1182            .map(|block_group_edge| (&block_group_edge.edge.id, block_group_edge))
1183            .collect::<HashMap<_, _>>();
1184
1185        let subgraph_edge_inputs = source_block_group_edges
1186            .iter()
1187            .map(|edge| {
1188                let block_group_edge = source_block_group_edges_by_edge_id
1189                    .get(&edge.edge.id)
1190                    .unwrap();
1191                let new_edge_id = new_edge_ids_by_old.get(&edge.edge.id).unwrap();
1192                BlockGroupEdgeData {
1193                    block_group_id: *target_block_group_id,
1194                    edge_id: **new_edge_id,
1195                    chromosome_index: block_group_edge.chromosome_index,
1196                    phased: block_group_edge.phased,
1197                }
1198            })
1199            .collect::<Vec<_>>();
1200
1201        let mut all_edges = subgraph_edge_inputs.clone();
1202
1203        if create_terminal_edges {
1204            let new_start_node_id = new_node_ids_by_old.get(&start_block.node_id).unwrap();
1205            let new_start_edge = Edge::create(
1206                conn,
1207                PATH_START_NODE_ID,
1208                0,
1209                Strand::Forward,
1210                *new_start_node_id,
1211                start_node_coordinate,
1212                start_block.strand,
1213            );
1214            let new_start_edge_data = BlockGroupEdgeData {
1215                block_group_id: *target_block_group_id,
1216                edge_id: new_start_edge.id,
1217                chromosome_index: 0,
1218                phased: 0,
1219            };
1220            let new_end_node_id = new_node_ids_by_old.get(&end_block.node_id).unwrap();
1221            let new_end_edge = Edge::create(
1222                conn,
1223                *new_end_node_id,
1224                end_node_coordinate,
1225                end_block.strand,
1226                PATH_END_NODE_ID,
1227                0,
1228                Strand::Forward,
1229            );
1230            let new_end_edge_data = BlockGroupEdgeData {
1231                block_group_id: *target_block_group_id,
1232                edge_id: new_end_edge.id,
1233                chromosome_index: 0,
1234                phased: 0,
1235            };
1236
1237            all_edges.push(new_start_edge_data);
1238            all_edges.push(new_end_edge_data);
1239        }
1240
1241        BlockGroupEdge::bulk_create(conn, &all_edges);
1242
1243        new_node_ids_by_old
1244    }
1245}
1246
1247impl Query for BlockGroup {
1248    type Model = BlockGroup;
1249
1250    const TABLE_NAME: &'static str = "block_groups";
1251
1252    fn process_row(row: &Row) -> Self::Model {
1253        BlockGroup {
1254            id: row.get(0).unwrap(),
1255            collection_name: row.get(1).unwrap(),
1256            sample_name: row.get(2).unwrap(),
1257            name: row.get(3).unwrap(),
1258            created_on: row.get(4).unwrap(),
1259            parent_block_group_id: row.get(5).unwrap(),
1260            is_default: row.get(6).unwrap(),
1261        }
1262    }
1263}
1264
1265#[cfg(test)]
1266mod tests {
1267    use core::ops::Range;
1268    use std::collections::HashSet;
1269
1270    use capnp::message::TypedBuilder;
1271    use chrono::Utc;
1272    use gen_core::NO_CHROMOSOME_INDEX;
1273
1274    use super::*;
1275    use crate::{
1276        collection::Collection,
1277        node::Node,
1278        sample::Sample,
1279        sequence::Sequence,
1280        test_helpers::{create_bg, get_connection, interval_tree_verify, setup_block_group},
1281    };
1282
1283    fn get_single_bg_id(
1284        conn: &GraphConnection,
1285        collection_name: &str,
1286        sample_name: &str,
1287        group_name: &str,
1288        parent_samples: Vec<String>,
1289    ) -> HashId {
1290        BlockGroup::get_or_create_sample_block_groups(
1291            conn,
1292            collection_name,
1293            sample_name,
1294            group_name,
1295            parent_samples,
1296        )
1297        .unwrap()
1298        .into_iter()
1299        .next()
1300        .unwrap()
1301        .id
1302    }
1303
1304    #[test]
1305    fn test_capnp_serialization() {
1306        let block_group = BlockGroup {
1307            id: HashId::pad_str(42),
1308            collection_name: "test_collection".to_string(),
1309            sample_name: "test_sample".to_string(),
1310            name: "test_block_group".to_string(),
1311            created_on: Utc::now().timestamp_nanos_opt().unwrap(),
1312            parent_block_group_id: None,
1313            is_default: false,
1314        };
1315
1316        let mut message = TypedBuilder::<block_group::Owned>::new_default();
1317        let mut root = message.init_root();
1318        block_group.write_capnp(&mut root);
1319
1320        let deserialized = BlockGroup::read_capnp(root.into_reader());
1321        assert_eq!(block_group, deserialized);
1322    }
1323
1324    #[test]
1325    fn test_capnp_deserialization_defaults_missing_parent_to_none() {
1326        let created_on = Utc::now().timestamp_nanos_opt().unwrap();
1327
1328        let mut message = TypedBuilder::<block_group::Owned>::new_default();
1329        let mut root = message.init_root();
1330        root.set_id(&HashId::pad_str(42).0).unwrap();
1331        root.set_collection_name("test_collection");
1332        root.set_sample_name("test_sample");
1333        root.set_name("test_block_group");
1334        root.set_created_on(created_on);
1335
1336        let deserialized = BlockGroup::read_capnp(root.into_reader());
1337        assert_eq!(deserialized.parent_block_group_id, None);
1338        assert!(!deserialized.is_default);
1339    }
1340
1341    #[test]
1342    fn test_blockgroup_create() {
1343        let conn = &get_connection(None).unwrap();
1344        Collection::create(conn, "test");
1345        Sample::get_or_create(conn, Sample::DEFAULT_NAME);
1346        let bg1 = create_bg(conn, "test", Sample::DEFAULT_NAME, "hg19");
1347        assert_eq!(bg1.collection_name, "test");
1348        assert_eq!(bg1.name, "hg19");
1349        Sample::get_or_create(conn, "sample");
1350        let bg2 = create_bg(conn, "test", "sample", "hg19");
1351        assert_eq!(bg2.collection_name, "test");
1352        assert_eq!(bg2.name, "hg19");
1353        assert_eq!(bg2.sample_name, "sample".to_string());
1354        assert_ne!(&bg1.id, &bg2.id);
1355    }
1356
1357    #[test]
1358    fn test_blockgroup_delete() {
1359        let conn = &get_connection(None).unwrap();
1360        Collection::create(conn, "test");
1361        Sample::get_or_create(conn, "sample1");
1362        Sample::get_or_create(conn, "sample2");
1363        let bg1 = create_bg(conn, "test", "sample1", "hg19");
1364        let bg2 = create_bg(conn, "test", "sample2", "hg19");
1365
1366        BlockGroup::delete(conn, "test", &bg1.sample_name, &bg1.name);
1367
1368        let bgs = BlockGroup::all(conn);
1369        assert_eq!(bgs.len(), 1);
1370        assert_eq!(bgs[0], bg2);
1371    }
1372
1373    #[test]
1374    fn test_blockgroup_clone() {
1375        let conn = &get_connection(None).unwrap();
1376        Collection::create(conn, "test");
1377        Sample::get_or_create(conn, Sample::DEFAULT_NAME);
1378        let bg1 = create_bg(conn, "test", Sample::DEFAULT_NAME, "hg19");
1379        assert_eq!(bg1.collection_name, "test");
1380        assert_eq!(bg1.name, "hg19");
1381        Sample::get_or_create(conn, "sample");
1382        let bg2 = get_single_bg_id(
1383            conn,
1384            "test",
1385            "sample",
1386            "hg19",
1387            vec![Sample::DEFAULT_NAME.to_string()],
1388        );
1389        assert_eq!(
1390            BlockGroupEdge::edges_for_block_group(conn, &bg1.id),
1391            BlockGroupEdge::edges_for_block_group(conn, &bg2)
1392        );
1393    }
1394
1395    #[test]
1396    fn test_blockgroup_copies_immediate_parent_block_groups() {
1397        let conn = &get_connection(None).unwrap();
1398        Collection::create(conn, "test");
1399        Sample::get_or_create(conn, "parent_a");
1400        Sample::get_or_create(conn, "parent_b");
1401        Sample::get_or_create(conn, "child");
1402
1403        let parent_a_bg = create_bg(conn, "test", "parent_a", "chr1");
1404        let parent_b_bg = create_bg(conn, "test", "parent_b", "chr1");
1405
1406        let seq_a = Sequence::new()
1407            .sequence_type("DNA")
1408            .sequence("AAAA")
1409            .save(conn);
1410        let seq_b = Sequence::new()
1411            .sequence_type("DNA")
1412            .sequence("CCCC")
1413            .save(conn);
1414        let node_a = Node::create(conn, &seq_a.hash, &HashId::convert_str("merge-parent-a"));
1415        let node_b = Node::create(conn, &seq_b.hash, &HashId::convert_str("merge-parent-b"));
1416
1417        let parent_a_edges = [
1418            Edge::create(
1419                conn,
1420                PATH_START_NODE_ID,
1421                0,
1422                Strand::Forward,
1423                node_a,
1424                0,
1425                Strand::Forward,
1426            ),
1427            Edge::create(
1428                conn,
1429                node_a,
1430                4,
1431                Strand::Forward,
1432                PATH_END_NODE_ID,
1433                0,
1434                Strand::Forward,
1435            ),
1436        ];
1437        let parent_b_edges = [
1438            Edge::create(
1439                conn,
1440                PATH_START_NODE_ID,
1441                0,
1442                Strand::Forward,
1443                node_b,
1444                0,
1445                Strand::Forward,
1446            ),
1447            Edge::create(
1448                conn,
1449                node_b,
1450                4,
1451                Strand::Forward,
1452                PATH_END_NODE_ID,
1453                0,
1454                Strand::Forward,
1455            ),
1456        ];
1457
1458        BlockGroupEdge::bulk_create(
1459            conn,
1460            &parent_a_edges
1461                .iter()
1462                .map(|edge| BlockGroupEdgeData {
1463                    block_group_id: parent_a_bg.id,
1464                    edge_id: edge.id,
1465                    chromosome_index: 0,
1466                    phased: 0,
1467                })
1468                .collect::<Vec<_>>(),
1469        );
1470        BlockGroupEdge::bulk_create(
1471            conn,
1472            &parent_b_edges
1473                .iter()
1474                .map(|edge| BlockGroupEdgeData {
1475                    block_group_id: parent_b_bg.id,
1476                    edge_id: edge.id,
1477                    chromosome_index: 0,
1478                    phased: 0,
1479                })
1480                .collect::<Vec<_>>(),
1481        );
1482
1483        let child_block_groups = BlockGroup::get_or_create_sample_block_groups(
1484            conn,
1485            "test",
1486            "child",
1487            "chr1",
1488            vec!["parent_a".to_string(), "parent_b".to_string()],
1489        )
1490        .unwrap();
1491        assert_eq!(child_block_groups.len(), 2);
1492
1493        let child_by_parent = child_block_groups
1494            .iter()
1495            .map(|block_group| (block_group.parent_block_group_id.unwrap(), block_group))
1496            .collect::<HashMap<_, _>>();
1497
1498        let child_a = child_by_parent.get(&parent_a_bg.id).unwrap();
1499        let child_b = child_by_parent.get(&parent_b_bg.id).unwrap();
1500
1501        let child_a_edges = BlockGroupEdge::query(
1502            conn,
1503            "select * from block_group_edges where block_group_id = ?1",
1504            params![child_a.id],
1505        );
1506        let child_b_edges = BlockGroupEdge::query(
1507            conn,
1508            "select * from block_group_edges where block_group_id = ?1",
1509            params![child_b.id],
1510        );
1511        assert_eq!(
1512            child_a_edges
1513                .iter()
1514                .map(|edge| edge.edge_id)
1515                .collect::<HashSet<_>>(),
1516            parent_a_edges
1517                .iter()
1518                .map(|edge| edge.id)
1519                .collect::<HashSet<_>>()
1520        );
1521        assert_eq!(
1522            child_b_edges
1523                .iter()
1524                .map(|edge| edge.edge_id)
1525                .collect::<HashSet<_>>(),
1526            parent_b_edges
1527                .iter()
1528                .map(|edge| edge.id)
1529                .collect::<HashSet<_>>()
1530        );
1531
1532        assert_eq!(
1533            BlockGroup::get_all_sequences(conn, &child_a.id, false),
1534            HashSet::from_iter(vec!["AAAA".to_string()])
1535        );
1536        assert_eq!(
1537            BlockGroup::get_all_sequences(conn, &child_b.id, false),
1538            HashSet::from_iter(vec!["CCCC".to_string()])
1539        );
1540        assert_eq!(
1541            Sample::get_all_sequences(conn, "test", "child", false),
1542            HashSet::from_iter(vec!["AAAA".to_string(), "CCCC".to_string()])
1543        );
1544    }
1545
1546    #[test]
1547    fn test_get_or_create_sample_block_groups_creates_root_block_group_if_no_parents() {
1548        let conn = &get_connection(None).unwrap();
1549        Collection::create(conn, "test");
1550        Sample::get_or_create(conn, "root_sample");
1551
1552        let block_groups = BlockGroup::get_or_create_sample_block_groups(
1553            conn,
1554            "test",
1555            "root_sample",
1556            "chr1",
1557            vec![],
1558        )
1559        .unwrap();
1560
1561        assert_eq!(block_groups.len(), 1);
1562        assert_eq!(block_groups[0].collection_name, "test");
1563        assert_eq!(block_groups[0].sample_name, "root_sample");
1564        assert_eq!(block_groups[0].name, "chr1");
1565        assert!(block_groups[0].parent_block_group_id.is_none());
1566    }
1567
1568    #[test]
1569    fn test_get_or_create_sample_block_groups_seeds_from_parents_without_block_groups() {
1570        let conn = &get_connection(None).unwrap();
1571        Collection::create(conn, "test");
1572        Sample::get_or_create(conn, "parent_sample");
1573        Sample::get_or_create(conn, "child_sample");
1574
1575        let block_groups = BlockGroup::get_or_create_sample_block_groups(
1576            conn,
1577            "test",
1578            "child_sample",
1579            "chr1",
1580            vec!["parent_sample".to_string()],
1581        )
1582        .unwrap();
1583
1584        assert_eq!(block_groups.len(), 1);
1585        assert_eq!(block_groups[0].collection_name, "test");
1586        assert_eq!(block_groups[0].sample_name, "child_sample");
1587        assert_eq!(block_groups[0].name, "chr1");
1588        assert!(block_groups[0].parent_block_group_id.is_none());
1589        assert!(BlockGroupEdge::edges_for_block_group(conn, &block_groups[0].id).is_empty());
1590    }
1591
1592    #[test]
1593    fn test_blockgroup_merge_from_multiple_parents_preserves_paths_and_accessions() {
1594        let conn = &get_connection(None).unwrap();
1595        Collection::create(conn, "test");
1596        Sample::get_or_create(conn, "parent_a");
1597        Sample::get_or_create(conn, "parent_b");
1598        Sample::get_or_create(conn, "child");
1599
1600        let parent_a_bg = create_bg(conn, "test", "parent_a", "chr1");
1601        let parent_b_bg = create_bg(conn, "test", "parent_b", "chr1");
1602
1603        let seq_a = Sequence::new()
1604            .sequence_type("DNA")
1605            .sequence("AAAA")
1606            .save(conn);
1607        let seq_b = Sequence::new()
1608            .sequence_type("DNA")
1609            .sequence("CCCC")
1610            .save(conn);
1611        let node_a = Node::create(conn, &seq_a.hash, &HashId::convert_str("metadata-parent-a"));
1612        let node_b = Node::create(conn, &seq_b.hash, &HashId::convert_str("metadata-parent-b"));
1613
1614        let parent_a_edges = [
1615            Edge::create(
1616                conn,
1617                PATH_START_NODE_ID,
1618                0,
1619                Strand::Forward,
1620                node_a,
1621                0,
1622                Strand::Forward,
1623            ),
1624            Edge::create(
1625                conn,
1626                node_a,
1627                4,
1628                Strand::Forward,
1629                PATH_END_NODE_ID,
1630                0,
1631                Strand::Forward,
1632            ),
1633        ];
1634        let parent_b_edges = [
1635            Edge::create(
1636                conn,
1637                PATH_START_NODE_ID,
1638                0,
1639                Strand::Forward,
1640                node_b,
1641                0,
1642                Strand::Forward,
1643            ),
1644            Edge::create(
1645                conn,
1646                node_b,
1647                4,
1648                Strand::Forward,
1649                PATH_END_NODE_ID,
1650                0,
1651                Strand::Forward,
1652            ),
1653        ];
1654
1655        BlockGroupEdge::bulk_create(
1656            conn,
1657            &parent_a_edges
1658                .iter()
1659                .map(|edge| BlockGroupEdgeData {
1660                    block_group_id: parent_a_bg.id,
1661                    edge_id: edge.id,
1662                    chromosome_index: 0,
1663                    phased: 0,
1664                })
1665                .collect::<Vec<_>>(),
1666        );
1667        BlockGroupEdge::bulk_create(
1668            conn,
1669            &parent_b_edges
1670                .iter()
1671                .map(|edge| BlockGroupEdgeData {
1672                    block_group_id: parent_b_bg.id,
1673                    edge_id: edge.id,
1674                    chromosome_index: 0,
1675                    phased: 0,
1676                })
1677                .collect::<Vec<_>>(),
1678        );
1679
1680        let parent_a_path = Path::create(
1681            conn,
1682            "chr1",
1683            &parent_a_bg.id,
1684            &parent_a_edges
1685                .iter()
1686                .map(|edge| edge.id)
1687                .collect::<Vec<_>>(),
1688        );
1689        let parent_b_path = Path::create(
1690            conn,
1691            "chr1",
1692            &parent_b_bg.id,
1693            &parent_b_edges
1694                .iter()
1695                .map(|edge| edge.id)
1696                .collect::<Vec<_>>(),
1697        );
1698        let parent_b_alt_path = Path::create(
1699            conn,
1700            "chr1-alt",
1701            &parent_b_bg.id,
1702            &parent_b_edges
1703                .iter()
1704                .map(|edge| edge.id)
1705                .collect::<Vec<_>>(),
1706        );
1707
1708        let mut path_cache = PathCache::new(conn);
1709        let parent_a_path_len = parent_a_path.length(conn);
1710        let parent_b_path_len = parent_b_path.length(conn);
1711        let parent_b_alt_path_len = parent_b_alt_path.length(conn);
1712        BlockGroup::add_accession(
1713            conn,
1714            &parent_a_path,
1715            "parent-a-acc",
1716            0,
1717            parent_a_path_len,
1718            &mut path_cache,
1719        );
1720        BlockGroup::add_accession(
1721            conn,
1722            &parent_b_path,
1723            "parent-b-acc",
1724            0,
1725            parent_b_path_len,
1726            &mut path_cache,
1727        );
1728        BlockGroup::add_accession(
1729            conn,
1730            &parent_b_alt_path,
1731            "parent-b-alt-acc",
1732            0,
1733            parent_b_alt_path_len,
1734            &mut path_cache,
1735        );
1736
1737        let child_block_groups = BlockGroup::get_or_create_sample_block_groups(
1738            conn,
1739            "test",
1740            "child",
1741            "chr1",
1742            vec!["parent_a".to_string(), "parent_b".to_string()],
1743        )
1744        .unwrap();
1745        let child_by_parent = child_block_groups
1746            .iter()
1747            .map(|block_group| (block_group.parent_block_group_id.unwrap(), block_group))
1748            .collect::<HashMap<_, _>>();
1749        let child_a = child_by_parent.get(&parent_a_bg.id).unwrap();
1750        let child_b = child_by_parent.get(&parent_b_bg.id).unwrap();
1751
1752        let child_a_paths = Path::query(
1753            conn,
1754            "select * from paths where block_group_id = ?1 order by name",
1755            params![child_a.id],
1756        );
1757        assert_eq!(
1758            child_a_paths
1759                .iter()
1760                .map(|path| path.name.as_str())
1761                .collect::<Vec<_>>(),
1762            vec!["chr1"]
1763        );
1764
1765        let child_b_paths = Path::query(
1766            conn,
1767            "select * from paths where block_group_id = ?1 order by name",
1768            params![child_b.id],
1769        );
1770        assert_eq!(
1771            child_b_paths
1772                .iter()
1773                .map(|path| path.name.as_str())
1774                .collect::<Vec<_>>(),
1775            vec!["chr1", "chr1-alt"]
1776        );
1777
1778        let child_a_accessions = Accession::query(
1779            conn,
1780            "select accessions.* from accessions join paths on accessions.path_id = paths.id where paths.block_group_id = ?1 order by accessions.name",
1781            params![child_a.id],
1782        );
1783        assert_eq!(
1784            child_a_accessions
1785                .iter()
1786                .map(|accession| accession.name.as_str())
1787                .collect::<Vec<_>>(),
1788            vec!["parent-a-acc"]
1789        );
1790
1791        let child_b_accessions = Accession::query(
1792            conn,
1793            "select accessions.* from accessions join paths on accessions.path_id = paths.id where paths.block_group_id = ?1 order by accessions.name",
1794            params![child_b.id],
1795        );
1796        assert_eq!(
1797            child_b_accessions
1798                .iter()
1799                .map(|accession| accession.name.as_str())
1800                .collect::<Vec<_>>(),
1801            vec!["parent-b-acc", "parent-b-alt-acc"]
1802        );
1803    }
1804
1805    #[test]
1806    fn test_blockgroup_clone_passes_accessions() {
1807        let conn = &get_connection(None).unwrap();
1808        let (_bg_1, path) = setup_block_group(conn);
1809        let mut path_cache = PathCache::new(conn);
1810        let acc_1 = BlockGroup::add_accession(conn, &path, "test", 3, 7, &mut path_cache);
1811        assert_eq!(
1812            Accession::query(
1813                conn,
1814                "select * from accessions where name = ?1",
1815                rusqlite::params!(SQLValue::from("test".to_string())),
1816            ),
1817            vec![Accession {
1818                id: acc_1.id,
1819                name: "test".to_string(),
1820                path_id: path.id,
1821                parent_accession_id: None,
1822            }]
1823        );
1824
1825        Sample::get_or_create(conn, "sample2");
1826        let _bg2 = get_single_bg_id(conn, "test", "sample2", "chr1", vec!["test".to_string()]);
1827        assert_eq!(
1828            Accession::query(
1829                conn,
1830                "select * from accessions where name = ?1",
1831                rusqlite::params!(SQLValue::from("test".to_string())),
1832            )
1833            .len(),
1834            2
1835        );
1836    }
1837
1838    #[test]
1839    fn insert_and_deletion_get_all() {
1840        let conn = get_connection(None).unwrap();
1841        let (block_group_id, path) = setup_block_group(&conn);
1842        let insert_sequence = Sequence::new()
1843            .sequence_type("DNA")
1844            .sequence("NNNN")
1845            .save(&conn);
1846        let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
1847        let insert = PathBlock {
1848            node_id: insert_node_id,
1849            block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
1850            sequence_start: 0,
1851            sequence_end: 4,
1852            path_start: 7,
1853            path_end: 15,
1854            strand: Strand::Forward,
1855        };
1856        let change = PathChange {
1857            block_group_id,
1858            path: path.clone(),
1859            path_accession: None,
1860            start: 7,
1861            end: 15,
1862            block: insert,
1863            chromosome_index: 1,
1864            phased: 0,
1865            preserve_edge: true,
1866        };
1867        let tree = path.intervaltree(&conn);
1868        BlockGroup::insert_change(&conn, &change, &tree).unwrap();
1869
1870        let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
1871        assert_eq!(
1872            all_sequences,
1873            HashSet::from_iter(vec![
1874                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
1875                "AAAAAAANNNNTTTTTCCCCCCCCCCGGGGGGGGGG".to_string()
1876            ])
1877        );
1878
1879        let deletion_sequence = Sequence::new()
1880            .sequence_type("DNA")
1881            .sequence("")
1882            .save(&conn);
1883        let deletion_node_id =
1884            Node::create(&conn, &deletion_sequence.hash, &HashId::convert_str("2"));
1885        let deletion = PathBlock {
1886            node_id: deletion_node_id,
1887            block_sequence: deletion_sequence.get_sequence(None, None),
1888            sequence_start: 0,
1889            sequence_end: 0,
1890            path_start: 19,
1891            path_end: 31,
1892            strand: Strand::Forward,
1893        };
1894
1895        let change = PathChange {
1896            block_group_id,
1897            path: path.clone(),
1898            path_accession: None,
1899            start: 19,
1900            end: 31,
1901            block: deletion,
1902            chromosome_index: 1,
1903            phased: 0,
1904            preserve_edge: true,
1905        };
1906        // take out an entire block.
1907        let tree = path.intervaltree(&conn);
1908        BlockGroup::insert_change(&conn, &change, &tree).unwrap();
1909        let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
1910        assert_eq!(
1911            all_sequences,
1912            HashSet::from_iter(vec![
1913                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
1914                "AAAAAAANNNNTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
1915                "AAAAAAAAAATTTTTTTTTGGGGGGGGG".to_string(),
1916                "AAAAAAANNNNTTTTGGGGGGGGG".to_string(),
1917            ])
1918        )
1919    }
1920
1921    #[test]
1922    fn simple_insert_get_all() {
1923        let conn = get_connection(None).unwrap();
1924        let (block_group_id, path) = setup_block_group(&conn);
1925        let insert_sequence = Sequence::new()
1926            .sequence_type("DNA")
1927            .sequence("NNNN")
1928            .save(&conn);
1929        let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
1930        let insert = PathBlock {
1931            node_id: insert_node_id,
1932            block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
1933            sequence_start: 0,
1934            sequence_end: 4,
1935            path_start: 7,
1936            path_end: 15,
1937            strand: Strand::Forward,
1938        };
1939        let change = PathChange {
1940            block_group_id,
1941            path: path.clone(),
1942            path_accession: None,
1943            start: 7,
1944            end: 15,
1945            block: insert,
1946            chromosome_index: 1,
1947            phased: 0,
1948            preserve_edge: true,
1949        };
1950        let tree = path.intervaltree(&conn);
1951        BlockGroup::insert_change(&conn, &change, &tree).unwrap();
1952
1953        let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
1954        assert_eq!(
1955            all_sequences,
1956            HashSet::from_iter(vec![
1957                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
1958                "AAAAAAANNNNTTTTTCCCCCCCCCCGGGGGGGGGG".to_string()
1959            ])
1960        );
1961    }
1962
1963    #[test]
1964    fn insert_on_block_boundary_middle() {
1965        let conn = get_connection(None).unwrap();
1966        let (block_group_id, path) = setup_block_group(&conn);
1967        let insert_sequence = Sequence::new()
1968            .sequence_type("DNA")
1969            .sequence("NNNN")
1970            .save(&conn);
1971        let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
1972        let insert = PathBlock {
1973            node_id: insert_node_id,
1974            block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
1975            sequence_start: 0,
1976            sequence_end: 4,
1977            path_start: 15,
1978            path_end: 15,
1979            strand: Strand::Forward,
1980        };
1981        let change = PathChange {
1982            block_group_id,
1983            path: path.clone(),
1984            path_accession: None,
1985            start: 15,
1986            end: 15,
1987            block: insert,
1988            chromosome_index: 1,
1989            phased: 0,
1990            preserve_edge: true,
1991        };
1992        let tree = path.intervaltree(&conn);
1993        BlockGroup::insert_change(&conn, &change, &tree).unwrap();
1994
1995        let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
1996        assert_eq!(
1997            all_sequences,
1998            HashSet::from_iter(vec![
1999                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2000                "AAAAAAAAAATTTTTNNNNTTTTTCCCCCCCCCCGGGGGGGGGG".to_string()
2001            ])
2002        );
2003    }
2004
2005    #[test]
2006    fn insert_within_block() {
2007        let conn = get_connection(None).unwrap();
2008        let (block_group_id, path) = setup_block_group(&conn);
2009        let insert_sequence = Sequence::new()
2010            .sequence_type("DNA")
2011            .sequence("NNNN")
2012            .save(&conn);
2013        let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
2014        let insert = PathBlock {
2015            node_id: insert_node_id,
2016            block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
2017            sequence_start: 0,
2018            sequence_end: 4,
2019            path_start: 12,
2020            path_end: 17,
2021            strand: Strand::Forward,
2022        };
2023        let change = PathChange {
2024            block_group_id,
2025            path: path.clone(),
2026            path_accession: None,
2027            start: 12,
2028            end: 17,
2029            block: insert,
2030            chromosome_index: 1,
2031            phased: 0,
2032            preserve_edge: true,
2033        };
2034        let tree = path.intervaltree(&conn);
2035        BlockGroup::insert_change(&conn, &change, &tree).unwrap();
2036
2037        let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
2038        assert_eq!(
2039            all_sequences,
2040            HashSet::from_iter(vec![
2041                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2042                "AAAAAAAAAATTNNNNTTTCCCCCCCCCCGGGGGGGGGG".to_string()
2043            ])
2044        );
2045    }
2046
2047    #[test]
2048    fn insert_on_block_boundary_start() {
2049        let conn = get_connection(None).unwrap();
2050        let (block_group_id, path) = setup_block_group(&conn);
2051        let insert_sequence = Sequence::new()
2052            .sequence_type("DNA")
2053            .sequence("NNNN")
2054            .save(&conn);
2055        let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
2056        let insert = PathBlock {
2057            node_id: insert_node_id,
2058            block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
2059            sequence_start: 0,
2060            sequence_end: 4,
2061            path_start: 10,
2062            path_end: 10,
2063            strand: Strand::Forward,
2064        };
2065        let change = PathChange {
2066            block_group_id,
2067            path: path.clone(),
2068            path_accession: None,
2069            start: 10,
2070            end: 10,
2071            block: insert,
2072            chromosome_index: 1,
2073            phased: 0,
2074            preserve_edge: true,
2075        };
2076        let tree = path.intervaltree(&conn);
2077        BlockGroup::insert_change(&conn, &change, &tree).unwrap();
2078
2079        let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
2080        assert_eq!(
2081            all_sequences,
2082            HashSet::from_iter(vec![
2083                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2084                "AAAAAAAAAANNNNTTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string()
2085            ])
2086        );
2087    }
2088
2089    #[test]
2090    fn insert_on_block_boundary_end() {
2091        let conn = get_connection(None).unwrap();
2092        let (block_group_id, path) = setup_block_group(&conn);
2093        let insert_sequence = Sequence::new()
2094            .sequence_type("DNA")
2095            .sequence("NNNN")
2096            .save(&conn);
2097        let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
2098        let insert = PathBlock {
2099            node_id: insert_node_id,
2100            block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
2101            sequence_start: 0,
2102            sequence_end: 4,
2103            path_start: 9,
2104            path_end: 9,
2105            strand: Strand::Forward,
2106        };
2107        let change = PathChange {
2108            block_group_id,
2109            path: path.clone(),
2110            path_accession: None,
2111            start: 9,
2112            end: 9,
2113            block: insert,
2114            chromosome_index: 1,
2115            phased: 0,
2116            preserve_edge: true,
2117        };
2118        let tree = path.intervaltree(&conn);
2119        BlockGroup::insert_change(&conn, &change, &tree).unwrap();
2120
2121        let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
2122        assert_eq!(
2123            all_sequences,
2124            HashSet::from_iter(vec![
2125                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2126                "AAAAAAAAANNNNATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string()
2127            ])
2128        );
2129    }
2130
2131    #[test]
2132    fn insert_across_entire_block_boundary() {
2133        let conn = get_connection(None).unwrap();
2134        let (block_group_id, path) = setup_block_group(&conn);
2135        let insert_sequence = Sequence::new()
2136            .sequence_type("DNA")
2137            .sequence("NNNN")
2138            .save(&conn);
2139        let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
2140        let insert = PathBlock {
2141            node_id: insert_node_id,
2142            block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
2143            sequence_start: 0,
2144            sequence_end: 4,
2145            path_start: 10,
2146            path_end: 20,
2147            strand: Strand::Forward,
2148        };
2149        let change = PathChange {
2150            block_group_id,
2151            path: path.clone(),
2152            path_accession: None,
2153            start: 10,
2154            end: 20,
2155            block: insert,
2156            chromosome_index: 1,
2157            phased: 0,
2158            preserve_edge: true,
2159        };
2160        let tree = path.intervaltree(&conn);
2161        BlockGroup::insert_change(&conn, &change, &tree).unwrap();
2162
2163        let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
2164        assert_eq!(
2165            all_sequences,
2166            HashSet::from_iter(vec![
2167                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2168                "AAAAAAAAAANNNNCCCCCCCCCCGGGGGGGGGG".to_string()
2169            ])
2170        );
2171    }
2172
2173    #[test]
2174    fn insert_across_two_blocks() {
2175        let conn = get_connection(None).unwrap();
2176        let (block_group_id, path) = setup_block_group(&conn);
2177        let insert_sequence = Sequence::new()
2178            .sequence_type("DNA")
2179            .sequence("NNNN")
2180            .save(&conn);
2181        let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
2182        let insert = PathBlock {
2183            node_id: insert_node_id,
2184            block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
2185            sequence_start: 0,
2186            sequence_end: 4,
2187            path_start: 15,
2188            path_end: 25,
2189            strand: Strand::Forward,
2190        };
2191        let change = PathChange {
2192            block_group_id,
2193            path: path.clone(),
2194            path_accession: None,
2195            start: 15,
2196            end: 25,
2197            block: insert,
2198            chromosome_index: 1,
2199            phased: 0,
2200            preserve_edge: true,
2201        };
2202        let tree = path.intervaltree(&conn);
2203        BlockGroup::insert_change(&conn, &change, &tree).unwrap();
2204
2205        let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
2206        assert_eq!(
2207            all_sequences,
2208            HashSet::from_iter(vec![
2209                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2210                "AAAAAAAAAATTTTTNNNNCCCCCGGGGGGGGGG".to_string()
2211            ])
2212        );
2213    }
2214
2215    #[test]
2216    fn insert_spanning_blocks() {
2217        let conn = get_connection(None).unwrap();
2218        let (block_group_id, path) = setup_block_group(&conn);
2219        let insert_sequence = Sequence::new()
2220            .sequence_type("DNA")
2221            .sequence("NNNN")
2222            .save(&conn);
2223        let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
2224        let insert = PathBlock {
2225            node_id: insert_node_id,
2226            block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
2227            sequence_start: 0,
2228            sequence_end: 4,
2229            path_start: 5,
2230            path_end: 35,
2231            strand: Strand::Forward,
2232        };
2233        let change = PathChange {
2234            block_group_id,
2235            path: path.clone(),
2236            path_accession: None,
2237            start: 5,
2238            end: 35,
2239            block: insert,
2240            chromosome_index: 1,
2241            phased: 0,
2242            preserve_edge: true,
2243        };
2244        let tree = path.intervaltree(&conn);
2245        BlockGroup::insert_change(&conn, &change, &tree).unwrap();
2246
2247        let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
2248        assert_eq!(
2249            all_sequences,
2250            HashSet::from_iter(vec![
2251                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2252                "AAAAANNNNGGGGG".to_string()
2253            ])
2254        );
2255    }
2256
2257    #[test]
2258    fn simple_deletion() {
2259        let conn = get_connection(None).unwrap();
2260        let (block_group_id, path) = setup_block_group(&conn);
2261        let deletion_sequence = Sequence::new()
2262            .sequence_type("DNA")
2263            .sequence("")
2264            .save(&conn);
2265        let deletion_node_id =
2266            Node::create(&conn, &deletion_sequence.hash, &HashId::convert_str("1"));
2267        let deletion = PathBlock {
2268            node_id: deletion_node_id,
2269            block_sequence: deletion_sequence.get_sequence(None, None),
2270            sequence_start: 0,
2271            sequence_end: 0,
2272            path_start: 19,
2273            path_end: 31,
2274            strand: Strand::Forward,
2275        };
2276
2277        let change = PathChange {
2278            block_group_id,
2279            path: path.clone(),
2280            path_accession: None,
2281            start: 19,
2282            end: 31,
2283            block: deletion,
2284            chromosome_index: 1,
2285            phased: 0,
2286            preserve_edge: true,
2287        };
2288
2289        // take out an entire block.
2290        let tree = path.intervaltree(&conn);
2291        BlockGroup::insert_change(&conn, &change, &tree).unwrap();
2292        let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
2293        assert_eq!(
2294            all_sequences,
2295            HashSet::from_iter(vec![
2296                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2297                "AAAAAAAAAATTTTTTTTTGGGGGGGGG".to_string(),
2298            ])
2299        )
2300    }
2301
2302    #[test]
2303    fn doesnt_apply_same_insert_twice() {
2304        let conn = get_connection(None).unwrap();
2305        let (block_group_id, path) = setup_block_group(&conn);
2306        let insert_sequence = Sequence::new()
2307            .sequence_type("DNA")
2308            .sequence("NNNN")
2309            .save(&conn);
2310        let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
2311        let insert = PathBlock {
2312            node_id: insert_node_id,
2313            block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
2314            sequence_start: 0,
2315            sequence_end: 4,
2316            path_start: 7,
2317            path_end: 15,
2318            strand: Strand::Forward,
2319        };
2320        let change = PathChange {
2321            block_group_id,
2322            path: path.clone(),
2323            path_accession: None,
2324            start: 7,
2325            end: 15,
2326            block: insert,
2327            chromosome_index: 1,
2328            phased: 0,
2329            preserve_edge: true,
2330        };
2331        let tree = path.intervaltree(&conn);
2332        BlockGroup::insert_change(&conn, &change, &tree).unwrap();
2333
2334        let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
2335        assert_eq!(
2336            all_sequences,
2337            HashSet::from_iter(vec![
2338                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2339                "AAAAAAANNNNTTTTTCCCCCCCCCCGGGGGGGGGG".to_string()
2340            ])
2341        );
2342
2343        let tree = path.intervaltree(&conn);
2344        BlockGroup::insert_change(&conn, &change, &tree).unwrap();
2345
2346        let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
2347        assert_eq!(
2348            all_sequences,
2349            HashSet::from_iter(vec![
2350                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2351                "AAAAAAANNNNTTTTTCCCCCCCCCCGGGGGGGGGG".to_string()
2352            ])
2353        );
2354    }
2355
2356    #[test]
2357    fn insert_at_beginning_of_path() {
2358        let conn = get_connection(None).unwrap();
2359        let (block_group_id, path) = setup_block_group(&conn);
2360        let insert_sequence = Sequence::new()
2361            .sequence_type("DNA")
2362            .sequence("NNNN")
2363            .save(&conn);
2364        let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
2365        let insert = PathBlock {
2366            node_id: insert_node_id,
2367            block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
2368            sequence_start: 0,
2369            sequence_end: 4,
2370            path_start: 0,
2371            path_end: 0,
2372            strand: Strand::Forward,
2373        };
2374        let change = PathChange {
2375            block_group_id,
2376            path: path.clone(),
2377            path_accession: None,
2378            start: 0,
2379            end: 0,
2380            block: insert,
2381            chromosome_index: 1,
2382            phased: 0,
2383            preserve_edge: true,
2384        };
2385        let tree = path.intervaltree(&conn);
2386        BlockGroup::insert_change(&conn, &change, &tree).unwrap();
2387
2388        let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
2389        assert_eq!(
2390            all_sequences,
2391            HashSet::from_iter(vec![
2392                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2393                "NNNNAAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2394            ])
2395        );
2396    }
2397
2398    #[test]
2399    fn homozygous_insert_at_beginning_of_path() {
2400        let conn = get_connection(None).unwrap();
2401        let (block_group_id, path) = setup_block_group(&conn);
2402        let insert_sequence = Sequence::new()
2403            .sequence_type("DNA")
2404            .sequence("NNNN")
2405            .save(&conn);
2406        let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
2407        let insert = PathBlock {
2408            node_id: insert_node_id,
2409            block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
2410            sequence_start: 0,
2411            sequence_end: 4,
2412            path_start: 0,
2413            path_end: 0,
2414            strand: Strand::Forward,
2415        };
2416        let change = PathChange {
2417            block_group_id,
2418            path: path.clone(),
2419            path_accession: None,
2420            start: 0,
2421            end: 0,
2422            block: insert,
2423            chromosome_index: 0,
2424            phased: 0,
2425            preserve_edge: true,
2426        };
2427        let tree = path.intervaltree(&conn);
2428        BlockGroup::insert_change(&conn, &change, &tree).unwrap();
2429
2430        let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
2431        assert_eq!(
2432            all_sequences,
2433            HashSet::from_iter(vec![
2434                "NNNNAAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2435            ])
2436        );
2437    }
2438
2439    #[test]
2440    fn insert_at_end_of_path() {
2441        let conn = get_connection(None).unwrap();
2442        let (block_group_id, path) = setup_block_group(&conn);
2443
2444        let insert_sequence = Sequence::new()
2445            .sequence_type("DNA")
2446            .sequence("NNNN")
2447            .save(&conn);
2448        let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
2449        let insert = PathBlock {
2450            node_id: insert_node_id,
2451            block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
2452            sequence_start: 0,
2453            sequence_end: 4,
2454            path_start: 40,
2455            path_end: 40,
2456            strand: Strand::Forward,
2457        };
2458        let change = PathChange {
2459            block_group_id,
2460            path: path.clone(),
2461            path_accession: None,
2462            start: 40,
2463            end: 40,
2464            block: insert,
2465            chromosome_index: 1,
2466            phased: 0,
2467            preserve_edge: true,
2468        };
2469        let tree = path.intervaltree(&conn);
2470        BlockGroup::insert_change(&conn, &change, &tree).unwrap();
2471
2472        let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
2473        assert_eq!(
2474            all_sequences,
2475            HashSet::from_iter(vec![
2476                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2477                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGGNNNN".to_string(),
2478            ])
2479        );
2480    }
2481
2482    #[test]
2483    fn insert_at_one_bp_into_block() {
2484        let conn = get_connection(None).unwrap();
2485        let (block_group_id, path) = setup_block_group(&conn);
2486        let insert_sequence = Sequence::new()
2487            .sequence_type("DNA")
2488            .sequence("NNNN")
2489            .save(&conn);
2490        let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
2491        let insert = PathBlock {
2492            node_id: insert_node_id,
2493            block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
2494            sequence_start: 0,
2495            sequence_end: 4,
2496            path_start: 10,
2497            path_end: 11,
2498            strand: Strand::Forward,
2499        };
2500        let change = PathChange {
2501            block_group_id,
2502            path: path.clone(),
2503            path_accession: None,
2504            start: 10,
2505            end: 11,
2506            block: insert,
2507            chromosome_index: 1,
2508            phased: 0,
2509            preserve_edge: true,
2510        };
2511        let tree = path.intervaltree(&conn);
2512        BlockGroup::insert_change(&conn, &change, &tree).unwrap();
2513
2514        let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
2515        assert_eq!(
2516            all_sequences,
2517            HashSet::from_iter(vec![
2518                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2519                "AAAAAAAAAANNNNTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2520            ])
2521        );
2522    }
2523
2524    #[test]
2525    fn insert_at_one_bp_from_end_of_block() {
2526        let conn = get_connection(None).unwrap();
2527        let (block_group_id, path) = setup_block_group(&conn);
2528        let insert_sequence = Sequence::new()
2529            .sequence_type("DNA")
2530            .sequence("NNNN")
2531            .save(&conn);
2532        let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
2533        let insert = PathBlock {
2534            node_id: insert_node_id,
2535            block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
2536            sequence_start: 0,
2537            sequence_end: 4,
2538            path_start: 19,
2539            path_end: 20,
2540            strand: Strand::Forward,
2541        };
2542        let change = PathChange {
2543            block_group_id,
2544            path: path.clone(),
2545            path_accession: None,
2546            start: 19,
2547            end: 20,
2548            block: insert,
2549            chromosome_index: 1,
2550            phased: 0,
2551            preserve_edge: true,
2552        };
2553        let tree = path.intervaltree(&conn);
2554        BlockGroup::insert_change(&conn, &change, &tree).unwrap();
2555
2556        let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
2557        assert_eq!(
2558            all_sequences,
2559            HashSet::from_iter(vec![
2560                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2561                "AAAAAAAAAATTTTTTTTTNNNNCCCCCCCCCCGGGGGGGGGG".to_string(),
2562            ])
2563        );
2564    }
2565
2566    #[test]
2567    fn delete_at_beginning_of_path() {
2568        let conn = get_connection(None).unwrap();
2569        let (block_group_id, path) = setup_block_group(&conn);
2570        let deletion_sequence = Sequence::new()
2571            .sequence_type("DNA")
2572            .sequence("")
2573            .save(&conn);
2574        let deletion_node_id =
2575            Node::create(&conn, &deletion_sequence.hash, &HashId::convert_str("1"));
2576        let deletion = PathBlock {
2577            node_id: deletion_node_id,
2578            block_sequence: deletion_sequence.get_sequence(None, None),
2579            sequence_start: 0,
2580            sequence_end: 0,
2581            path_start: 0,
2582            path_end: 1,
2583            strand: Strand::Forward,
2584        };
2585        let change = PathChange {
2586            block_group_id,
2587            path: path.clone(),
2588            path_accession: None,
2589            start: 0,
2590            end: 1,
2591            block: deletion,
2592            chromosome_index: 1,
2593            phased: 0,
2594            preserve_edge: true,
2595        };
2596        let tree = path.intervaltree(&conn);
2597        BlockGroup::insert_change(&conn, &change, &tree).unwrap();
2598
2599        let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
2600        assert_eq!(
2601            all_sequences,
2602            HashSet::from_iter(vec![
2603                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2604                "AAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2605            ])
2606        );
2607    }
2608
2609    #[test]
2610    fn delete_at_end_of_path() {
2611        let conn = get_connection(None).unwrap();
2612        let (block_group_id, path) = setup_block_group(&conn);
2613        let deletion_sequence = Sequence::new()
2614            .sequence_type("DNA")
2615            .sequence("")
2616            .save(&conn);
2617        let deletion_node_id =
2618            Node::create(&conn, &deletion_sequence.hash, &HashId::convert_str("1"));
2619        let deletion = PathBlock {
2620            node_id: deletion_node_id,
2621            block_sequence: deletion_sequence.get_sequence(None, None),
2622            sequence_start: 0,
2623            sequence_end: 0,
2624            path_start: 35,
2625            path_end: 40,
2626            strand: Strand::Forward,
2627        };
2628        let change = PathChange {
2629            block_group_id,
2630            path: path.clone(),
2631            path_accession: None,
2632            start: 35,
2633            end: 40,
2634            block: deletion,
2635            chromosome_index: 1,
2636            phased: 0,
2637            preserve_edge: true,
2638        };
2639        let tree = path.intervaltree(&conn);
2640        BlockGroup::insert_change(&conn, &change, &tree).unwrap();
2641
2642        let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
2643        assert_eq!(
2644            all_sequences,
2645            HashSet::from_iter(vec![
2646                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGG".to_string(),
2647                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2648            ])
2649        );
2650    }
2651
2652    #[test]
2653    fn error_on_out_of_bounds_change() {
2654        let conn = get_connection(None).unwrap();
2655        let (block_group_id, path) = setup_block_group(&conn);
2656        let deletion_sequence = Sequence::new()
2657            .sequence_type("DNA")
2658            .sequence("")
2659            .save(&conn);
2660        let deletion_node_id =
2661            Node::create(&conn, &deletion_sequence.hash, &HashId::convert_str("1"));
2662        let deletion = PathBlock {
2663            node_id: deletion_node_id,
2664            block_sequence: deletion_sequence.get_sequence(None, None),
2665            sequence_start: 0,
2666            sequence_end: 0,
2667            path_start: 350,
2668            path_end: 400,
2669            strand: Strand::Forward,
2670        };
2671        let after_end_change = PathChange {
2672            block_group_id,
2673            path: path.clone(),
2674            path_accession: None,
2675            start: 350,
2676            end: 400,
2677            block: deletion.clone(),
2678            chromosome_index: 1,
2679            phased: 0,
2680            preserve_edge: true,
2681        };
2682        let before_start_change = PathChange {
2683            block_group_id,
2684            path: path.clone(),
2685            path_accession: None,
2686            start: -300,
2687            end: 400,
2688            block: deletion,
2689            chromosome_index: 1,
2690            phased: 0,
2691            preserve_edge: true,
2692        };
2693        let tree = path.intervaltree(&conn);
2694        let res = BlockGroup::insert_change(&conn, &after_end_change, &tree);
2695        assert!(matches!(res, Err(ChangeError::OutOfBounds(_))));
2696        let res = BlockGroup::insert_change(&conn, &before_start_change, &tree);
2697        assert!(matches!(res, Err(ChangeError::OutOfBounds(_))));
2698    }
2699
2700    #[test]
2701    fn deletion_starting_at_block_boundary() {
2702        let conn = get_connection(None).unwrap();
2703        let (block_group_id, path) = setup_block_group(&conn);
2704        let deletion_sequence = Sequence::new()
2705            .sequence_type("DNA")
2706            .sequence("")
2707            .save(&conn);
2708        let deletion_node_id =
2709            Node::create(&conn, &deletion_sequence.hash, &HashId::convert_str("1"));
2710        let deletion = PathBlock {
2711            node_id: deletion_node_id,
2712            block_sequence: deletion_sequence.get_sequence(None, None),
2713            sequence_start: 0,
2714            sequence_end: 0,
2715            path_start: 10,
2716            path_end: 12,
2717            strand: Strand::Forward,
2718        };
2719        let change = PathChange {
2720            block_group_id,
2721            path: path.clone(),
2722            path_accession: None,
2723            start: 10,
2724            end: 12,
2725            block: deletion,
2726            chromosome_index: 1,
2727            phased: 0,
2728            preserve_edge: true,
2729        };
2730        let tree = path.intervaltree(&conn);
2731        BlockGroup::insert_change(&conn, &change, &tree).unwrap();
2732
2733        let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
2734        assert_eq!(
2735            all_sequences,
2736            HashSet::from_iter(vec![
2737                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2738                "AAAAAAAAAATTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2739            ])
2740        );
2741    }
2742
2743    #[test]
2744    fn deletion_ending_at_block_boundary() {
2745        let conn = get_connection(None).unwrap();
2746        let (block_group_id, path) = setup_block_group(&conn);
2747        let deletion_sequence = Sequence::new()
2748            .sequence_type("DNA")
2749            .sequence("")
2750            .save(&conn);
2751        let deletion_node_id =
2752            Node::create(&conn, &deletion_sequence.hash, &HashId::convert_str("1"));
2753        let deletion = PathBlock {
2754            node_id: deletion_node_id,
2755            block_sequence: deletion_sequence.get_sequence(None, None),
2756            sequence_start: 0,
2757            sequence_end: 0,
2758            path_start: 18,
2759            path_end: 20,
2760            strand: Strand::Forward,
2761        };
2762        let change = PathChange {
2763            block_group_id,
2764            path: path.clone(),
2765            path_accession: None,
2766            start: 18,
2767            end: 20,
2768            block: deletion,
2769            chromosome_index: 1,
2770            phased: 0,
2771            preserve_edge: true,
2772        };
2773        let tree = path.intervaltree(&conn);
2774        BlockGroup::insert_change(&conn, &change, &tree).unwrap();
2775
2776        let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
2777        assert_eq!(
2778            all_sequences,
2779            HashSet::from_iter(vec![
2780                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2781                "AAAAAAAAAATTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2782            ])
2783        );
2784    }
2785
2786    #[test]
2787    fn test_blockgroup_interval_tree() {
2788        let conn = &get_connection(None).unwrap();
2789        let (block_group_id, path) = setup_block_group(conn);
2790        let _new_sample = Sample::get_or_create(conn, "child");
2791        let new_bg_id = get_single_bg_id(conn, "test", "child", "chr1", vec!["test".to_string()]);
2792        let new_path = Path::query(
2793            conn,
2794            "select * from paths where block_group_id = ?1",
2795            params![new_bg_id],
2796        );
2797        let insert_sequence = Sequence::new()
2798            .sequence_type("DNA")
2799            .sequence("NNNN")
2800            .save(conn);
2801        let insert_node_id = Node::create(
2802            conn,
2803            &insert_sequence.hash,
2804            &HashId::convert_str("insert-node"),
2805        );
2806        let insert = PathBlock {
2807            node_id: insert_node_id,
2808            block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
2809            sequence_start: 0,
2810            sequence_end: 4,
2811            path_start: 7,
2812            path_end: 15,
2813            strand: Strand::Forward,
2814        };
2815        let change = PathChange {
2816            block_group_id: new_bg_id,
2817            path: new_path[0].clone(),
2818            path_accession: None,
2819            start: 7,
2820            end: 15,
2821            block: insert,
2822            chromosome_index: 1,
2823            phased: 0,
2824            preserve_edge: true,
2825        };
2826        let tree = path.intervaltree(conn);
2827        BlockGroup::insert_change(conn, &change, &tree).unwrap();
2828
2829        let tree = BlockGroup::intervaltree_for(conn, &block_group_id, false);
2830        let tree2 = BlockGroup::intervaltree_for(conn, &block_group_id, true);
2831        interval_tree_verify(
2832            &tree,
2833            3,
2834            &[NodeIntervalBlock {
2835                node_id: HashId::convert_str("test-a-node"),
2836                start: 0,
2837                end: 10,
2838                sequence_start: 0,
2839                sequence_end: 10,
2840                strand: Strand::Forward,
2841            }],
2842        );
2843        interval_tree_verify(
2844            &tree2,
2845            3,
2846            &[NodeIntervalBlock {
2847                node_id: HashId::convert_str("test-a-node"),
2848                start: 0,
2849                end: 10,
2850                sequence_start: 0,
2851                sequence_end: 10,
2852                strand: Strand::Forward,
2853            }],
2854        );
2855        interval_tree_verify(
2856            &tree,
2857            35,
2858            &[NodeIntervalBlock {
2859                node_id: HashId::convert_str("test-g-node"),
2860                start: 30,
2861                end: 40,
2862                sequence_start: 0,
2863                sequence_end: 10,
2864                strand: Strand::Forward,
2865            }],
2866        );
2867        interval_tree_verify(
2868            &tree2,
2869            35,
2870            &[NodeIntervalBlock {
2871                node_id: HashId::convert_str("test-g-node"),
2872                start: 30,
2873                end: 40,
2874                sequence_start: 0,
2875                sequence_end: 10,
2876                strand: Strand::Forward,
2877            }],
2878        );
2879
2880        // This blockgroup has a change from positions 7-15 of 4 base pairs -- so any changes after this will be ambiguous
2881        let tree = BlockGroup::intervaltree_for(conn, &new_bg_id, false);
2882        let tree2 = BlockGroup::intervaltree_for(conn, &new_bg_id, true);
2883        interval_tree_verify(
2884            &tree,
2885            3,
2886            &[NodeIntervalBlock {
2887                node_id: HashId::convert_str("test-a-node"),
2888                start: 0,
2889                end: 7,
2890                sequence_start: 0,
2891                sequence_end: 7,
2892                strand: Strand::Forward,
2893            }],
2894        );
2895        interval_tree_verify(
2896            &tree2,
2897            3,
2898            &[NodeIntervalBlock {
2899                node_id: HashId::convert_str("test-a-node"),
2900                start: 0,
2901                end: 7,
2902                sequence_start: 0,
2903                sequence_end: 7,
2904                strand: Strand::Forward,
2905            }],
2906        );
2907        interval_tree_verify(
2908            &tree,
2909            30,
2910            &[
2911                NodeIntervalBlock {
2912                    node_id: HashId::convert_str("test-g-node"),
2913                    start: 26,
2914                    end: 36,
2915                    sequence_start: 0,
2916                    sequence_end: 10,
2917                    strand: Strand::Forward,
2918                },
2919                NodeIntervalBlock {
2920                    node_id: HashId::convert_str("test-g-node"),
2921                    start: 30,
2922                    end: 40,
2923                    sequence_start: 0,
2924                    sequence_end: 10,
2925                    strand: Strand::Forward,
2926                },
2927            ],
2928        );
2929        interval_tree_verify(&tree2, 30, &[]);
2930        // TODO: This case should return [] because there are 2 distinct nodes at this position and thus it is ambiguous.
2931        // currently, the caller needs to filter these out.
2932        interval_tree_verify(
2933            &tree2,
2934            9,
2935            &[
2936                NodeIntervalBlock {
2937                    node_id: HashId::convert_str("insert-node"),
2938                    start: 7,
2939                    end: 11,
2940                    sequence_start: 0,
2941                    sequence_end: 4,
2942                    strand: Strand::Forward,
2943                },
2944                NodeIntervalBlock {
2945                    node_id: HashId::convert_str("test-a-node"),
2946                    start: 7,
2947                    end: 10,
2948                    sequence_start: 7,
2949                    sequence_end: 10,
2950                    strand: Strand::Forward,
2951                },
2952            ],
2953        );
2954    }
2955
2956    #[test]
2957    fn test_changes_against_derivative_blockgroups() {
2958        let conn = &get_connection(None).unwrap();
2959        let (_block_group_id, _path) = setup_block_group(conn);
2960        let _new_sample = Sample::get_or_create(conn, "child");
2961        let new_bg_id = get_single_bg_id(conn, "test", "child", "chr1", vec!["test".to_string()]);
2962        let new_path = Path::query(
2963            conn,
2964            "select * from paths where block_group_id = ?1",
2965            rusqlite::params!(SQLValue::from(new_bg_id)),
2966        );
2967        let insert_sequence = Sequence::new()
2968            .sequence_type("DNA")
2969            .sequence("NNNN")
2970            .save(conn);
2971        let insert_node_id = Node::create(conn, &insert_sequence.hash, &HashId::convert_str("1"));
2972        let insert = PathBlock {
2973            node_id: insert_node_id,
2974            block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
2975            sequence_start: 0,
2976            sequence_end: 4,
2977            path_start: 7,
2978            path_end: 15,
2979            strand: Strand::Forward,
2980        };
2981        let change = PathChange {
2982            block_group_id: new_bg_id,
2983            path: new_path[0].clone(),
2984            path_accession: None,
2985            start: 7,
2986            end: 15,
2987            block: insert,
2988            chromosome_index: 1,
2989            phased: 0,
2990            preserve_edge: false,
2991        };
2992        // note we are making our change against the new blockgroup, and not the parent blockgroup
2993        let tree = BlockGroup::intervaltree_for(conn, &new_bg_id, true);
2994        BlockGroup::insert_change(conn, &change, &tree).unwrap();
2995        let all_sequences = BlockGroup::get_all_sequences(conn, &new_bg_id, true);
2996        assert_eq!(
2997            all_sequences,
2998            HashSet::from_iter(vec!["AAAAAAANNNNTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),])
2999        );
3000
3001        // Now, we make a change against another descendant
3002        let _new_sample = Sample::get_or_create(conn, "grandchild");
3003        let gc_bg_id = get_single_bg_id(
3004            conn,
3005            "test",
3006            "grandchild",
3007            "chr1",
3008            vec!["child".to_string()],
3009        );
3010        let new_path = Path::query(
3011            conn,
3012            "select * from paths where block_group_id = ?1",
3013            rusqlite::params!(SQLValue::from(gc_bg_id)),
3014        );
3015
3016        let insert = PathBlock {
3017            node_id: insert_node_id,
3018            block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
3019            sequence_start: 0,
3020            sequence_end: 4,
3021            path_start: 7,
3022            path_end: 15,
3023            strand: Strand::Forward,
3024        };
3025        let change = PathChange {
3026            block_group_id: gc_bg_id,
3027            path: new_path[0].clone(),
3028            path_accession: None,
3029            start: 7,
3030            end: 15,
3031            block: insert,
3032            chromosome_index: 1,
3033            phased: 0,
3034            preserve_edge: false,
3035        };
3036        // take out an entire block.
3037        let tree = BlockGroup::intervaltree_for(conn, &gc_bg_id, true);
3038        BlockGroup::insert_change(conn, &change, &tree).unwrap();
3039        let all_sequences = BlockGroup::get_all_sequences(conn, &gc_bg_id, true);
3040        assert_eq!(
3041            all_sequences,
3042            HashSet::from_iter(vec!["AAAAAAANNNNTCCCCCCCCCCGGGGGGGGGG".to_string(),])
3043        )
3044    }
3045
3046    #[test]
3047    fn test_changes_against_derivative_diploid_blockgroups() {
3048        // This test ensures that if we have heterozygous changes that do not introduce frameshifts,
3049        // we can modify regions downstream of them.
3050        let conn = &get_connection(None).unwrap();
3051        let (_block_group_id, _path) = setup_block_group(conn);
3052        let _new_sample = Sample::get_or_create(conn, "child");
3053        let new_bg_id = get_single_bg_id(conn, "test", "child", "chr1", vec!["test".to_string()]);
3054        let new_path = Path::query(
3055            conn,
3056            "select * from paths where block_group_id = ?1",
3057            params![new_bg_id],
3058        );
3059        let insert_sequence = Sequence::new()
3060            .sequence_type("DNA")
3061            .sequence("NNNN")
3062            .save(conn);
3063        let insert_node_id = Node::create(conn, &insert_sequence.hash, &HashId::convert_str("1"));
3064        let insert = PathBlock {
3065            node_id: insert_node_id,
3066            block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
3067            sequence_start: 0,
3068            sequence_end: 4,
3069            path_start: 7,
3070            path_end: 11,
3071            strand: Strand::Forward,
3072        };
3073        let change = PathChange {
3074            block_group_id: new_bg_id,
3075            path: new_path[0].clone(),
3076            path_accession: None,
3077            start: 7,
3078            end: 11,
3079            block: insert,
3080            chromosome_index: 1,
3081            phased: 0,
3082            preserve_edge: true,
3083        };
3084        // note we are making our change against the new blockgroup, and not the parent blockgroup
3085        let tree = BlockGroup::intervaltree_for(conn, &new_bg_id, true);
3086        BlockGroup::insert_change(conn, &change, &tree).unwrap();
3087        let all_sequences = BlockGroup::get_all_sequences(conn, &new_bg_id, true);
3088        assert_eq!(
3089            all_sequences,
3090            HashSet::from_iter(vec![
3091                "AAAAAAANNNNTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
3092                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
3093            ])
3094        );
3095
3096        // Now, we make a change against another descendant
3097        let _new_sample = Sample::get_or_create(conn, "grandchild");
3098        let gc_bg_id = get_single_bg_id(
3099            conn,
3100            "test",
3101            "grandchild",
3102            "chr1",
3103            vec!["child".to_string()],
3104        );
3105        let new_path = Path::query(
3106            conn,
3107            "select * from paths where block_group_id = ?1",
3108            params![gc_bg_id],
3109        );
3110
3111        let insert_sequence = Sequence::new()
3112            .sequence_type("DNA")
3113            .sequence("NNNN")
3114            .save(conn);
3115        let insert_node_id = Node::create(
3116            conn,
3117            &insert_sequence.hash,
3118            &HashId::convert_str("new-hash"),
3119        );
3120
3121        let insert = PathBlock {
3122            node_id: insert_node_id,
3123            block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
3124            sequence_start: 0,
3125            sequence_end: 4,
3126            path_start: 20,
3127            path_end: 24,
3128            strand: Strand::Forward,
3129        };
3130        let change = PathChange {
3131            block_group_id: gc_bg_id,
3132            path: new_path[0].clone(),
3133            path_accession: None,
3134            start: 20,
3135            end: 24,
3136            block: insert,
3137            chromosome_index: 1,
3138            phased: 0,
3139            preserve_edge: true,
3140        };
3141        // take out an entire block.
3142        let tree = BlockGroup::intervaltree_for(conn, &gc_bg_id, true);
3143        BlockGroup::insert_change(conn, &change, &tree).unwrap();
3144        let all_sequences = BlockGroup::get_all_sequences(conn, &gc_bg_id, true);
3145        assert_eq!(
3146            all_sequences,
3147            HashSet::from_iter(vec![
3148                "AAAAAAANNNNTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
3149                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
3150                "AAAAAAAAAATTTTTTTTTTNNNNCCCCCCGGGGGGGGGG".to_string(),
3151                "AAAAAAANNNNTTTTTTTTTNNNNCCCCCCGGGGGGGGGG".to_string()
3152            ])
3153        )
3154    }
3155
3156    #[test]
3157    #[should_panic]
3158    fn test_prohibits_out_of_frame_changes_against_derivative_diploid_blockgroups() {
3159        // This test ensures that we do not allow ambiguous changes by coordinates
3160        let conn = &get_connection(None).unwrap();
3161        let (_block_group_id, _path) = setup_block_group(conn);
3162        let _new_sample = Sample::get_or_create(conn, "child");
3163        let new_bg_id = get_single_bg_id(conn, "test", "child", "chr1", vec!["test".to_string()]);
3164        let new_path = Path::query(
3165            conn,
3166            "select * from paths where block_group_id = ?1",
3167            rusqlite::params!(SQLValue::from(new_bg_id)),
3168        );
3169        // This is a heterozygous replacement of 5 bases with 4 bases, so positions
3170        // downstream of this are not addressable.
3171        let insert_sequence = Sequence::new()
3172            .sequence_type("DNA")
3173            .sequence("NNNN")
3174            .save(conn);
3175        let insert_node_id = Node::create(conn, &insert_sequence.hash, &HashId::convert_str("1"));
3176        let insert = PathBlock {
3177            node_id: insert_node_id,
3178            block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
3179            sequence_start: 0,
3180            sequence_end: 4,
3181            path_start: 7,
3182            path_end: 12,
3183            strand: Strand::Forward,
3184        };
3185        let change = PathChange {
3186            block_group_id: new_bg_id,
3187            path: new_path[0].clone(),
3188            path_accession: None,
3189            start: 7,
3190            end: 12,
3191            block: insert,
3192            chromosome_index: 1,
3193            phased: 0,
3194            preserve_edge: true,
3195        };
3196        // note we are making our change against the new blockgroup, and not the parent blockgroup
3197        let tree = BlockGroup::intervaltree_for(conn, &new_bg_id, true);
3198        BlockGroup::insert_change(conn, &change, &tree).unwrap();
3199        let all_sequences = BlockGroup::get_all_sequences(conn, &new_bg_id, true);
3200        assert_eq!(
3201            all_sequences,
3202            HashSet::from_iter(vec![
3203                "AAAAAAANNNNTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
3204                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
3205            ])
3206        );
3207
3208        // Now, we make a change against another descendant and get an error
3209        let _new_sample = Sample::get_or_create(conn, "grandchild");
3210        let gc_bg_id = get_single_bg_id(
3211            conn,
3212            "test",
3213            "grandchild",
3214            "chr1",
3215            vec!["child".to_string()],
3216        );
3217        let new_path = Path::query(
3218            conn,
3219            "select * from paths where block_group_id = ?1",
3220            rusqlite::params!(SQLValue::from(gc_bg_id)),
3221        );
3222
3223        let insert_sequence = Sequence::new()
3224            .sequence_type("DNA")
3225            .sequence("NNNN")
3226            .save(conn);
3227        let insert_node_id =
3228            Node::create(conn, &insert_sequence.hash, &HashId::pad_str("new-hash"));
3229
3230        let insert = PathBlock {
3231            node_id: insert_node_id,
3232            block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
3233            sequence_start: 0,
3234            sequence_end: 4,
3235            path_start: 20,
3236            path_end: 24,
3237            strand: Strand::Forward,
3238        };
3239        let change = PathChange {
3240            block_group_id: gc_bg_id,
3241            path: new_path[0].clone(),
3242            path_accession: None,
3243            start: 20,
3244            end: 24,
3245            block: insert,
3246            chromosome_index: 1,
3247            phased: 0,
3248            preserve_edge: true,
3249        };
3250        // take out an entire block.
3251        let tree = BlockGroup::intervaltree_for(conn, &gc_bg_id, true);
3252        BlockGroup::insert_change(conn, &change, &tree).unwrap();
3253    }
3254
3255    mod test_derive_subgraph {
3256
3257        use super::*;
3258        use crate::{
3259            collection::Collection,
3260            node::Node,
3261            sequence::Sequence,
3262            test_helpers::{get_connection, setup_block_group},
3263        };
3264
3265        #[test]
3266        fn test_derive_subgraph_one_insertion() {
3267            /*
3268            AAAAAAAAAA -> TTTTTTTTTT -> CCCCCCCCCC -> GGGGGGGGGG
3269                              \-> AAAAAAAA ->/
3270            Subgraph range:  |-----------------|
3271            Sequences of the subgraph are TAAAAAAAAC, TTTTTCCCCC
3272             */
3273            let conn = &get_connection(None).unwrap();
3274            Collection::create(conn, "test");
3275            let (block_group1_id, original_path) = setup_block_group(conn);
3276
3277            let intervaltree = original_path.intervaltree(conn);
3278            let insert_start_node_id = intervaltree.query_point(16).next().unwrap().value.node_id;
3279            let insert_end_node_id = intervaltree.query_point(24).next().unwrap().value.node_id;
3280
3281            let insert_sequence = Sequence::new()
3282                .sequence_type("DNA")
3283                .sequence("AAAAAAAA")
3284                .save(conn);
3285            let insert_node_id = Node::create(
3286                conn,
3287                &insert_sequence.hash,
3288                &HashId(calculate_hash(&format!(
3289                    "test-insert-a-node.{}",
3290                    insert_sequence.hash
3291                ))),
3292            );
3293            let edge_into_insert = Edge::create(
3294                conn,
3295                insert_start_node_id,
3296                6,
3297                Strand::Forward,
3298                insert_node_id,
3299                0,
3300                Strand::Forward,
3301            );
3302            let edge_out_of_insert = Edge::create(
3303                conn,
3304                insert_node_id,
3305                8,
3306                Strand::Forward,
3307                insert_end_node_id,
3308                4,
3309                Strand::Forward,
3310            );
3311            let ref_heal_1 = Edge::create(
3312                conn,
3313                insert_start_node_id,
3314                6,
3315                Strand::Forward,
3316                insert_start_node_id,
3317                6,
3318                Strand::Forward,
3319            );
3320            let ref_heal_2 = Edge::create(
3321                conn,
3322                insert_end_node_id,
3323                4,
3324                Strand::Forward,
3325                insert_end_node_id,
3326                4,
3327                Strand::Forward,
3328            );
3329
3330            let edge_ids = [
3331                &edge_into_insert.id,
3332                &edge_out_of_insert.id,
3333                &ref_heal_1.id,
3334                &ref_heal_2.id,
3335            ];
3336            let block_group_edges = edge_ids
3337                .iter()
3338                .enumerate()
3339                .map(|(i, edge_id)| BlockGroupEdgeData {
3340                    block_group_id: block_group1_id,
3341                    edge_id: *(*edge_id),
3342                    chromosome_index: if i < 2 { 1 } else { 0 },
3343                    phased: 0,
3344                })
3345                .collect::<Vec<BlockGroupEdgeData>>();
3346            BlockGroupEdge::bulk_create(conn, &block_group_edges);
3347
3348            let insert_path =
3349                original_path.new_path_with(conn, 16, 24, &edge_into_insert, &edge_out_of_insert);
3350            assert_eq!(
3351                insert_path.sequence(conn),
3352                "AAAAAAAAAATTTTTTAAAAAAAACCCCCCGGGGGGGGGG"
3353            );
3354
3355            let all_sequences = BlockGroup::get_all_sequences(conn, &block_group1_id, false);
3356            assert_eq!(
3357                all_sequences,
3358                HashSet::from_iter(vec![
3359                    "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
3360                    "AAAAAAAAAATTTTTTAAAAAAAACCCCCCGGGGGGGGGG".to_string(),
3361                ])
3362            );
3363
3364            let mut blocks = intervaltree
3365                .query(Range { start: 15, end: 25 })
3366                .map(|x| x.value)
3367                .collect::<Vec<_>>();
3368            blocks.sort_by(|a, b| a.start.cmp(&b.start));
3369            let start_block = blocks[0];
3370            let start_node_coordinate = 15 - start_block.start + start_block.sequence_start;
3371            let end_block = blocks[blocks.len() - 1];
3372            let end_node_coordinate = 25 - end_block.start + end_block.sequence_start;
3373
3374            let block_group2 = create_bg(conn, "test", "test", "chr1.1");
3375            BlockGroup::derive_subgraph(
3376                conn,
3377                "test",
3378                "test",
3379                &block_group1_id,
3380                &start_block,
3381                &end_block,
3382                start_node_coordinate,
3383                end_node_coordinate,
3384                &block_group2.id,
3385                true,
3386            );
3387            let all_sequences2 = BlockGroup::get_all_sequences(conn, &block_group2.id, false);
3388            assert_eq!(
3389                all_sequences2,
3390                HashSet::from_iter(vec!["TTTTTCCCCC".to_string(), "TAAAAAAAAC".to_string(),])
3391            );
3392        }
3393
3394        #[test]
3395        fn test_derive_subgraph_two_independent_insertions() {
3396            /*
3397            AAAAAAAAAA -> TTTTTTTTTT -> CCCCCCCCCC -----> GGGGGGGGGG
3398                                  \-> AAAAAAAA ->/  \->TTTTTTTT -/
3399            Subgraph range:     |----------------------------------|
3400             */
3401            let conn = &get_connection(None).unwrap();
3402            Collection::create(conn, "test");
3403            let (block_group1_id, original_path) = setup_block_group(conn);
3404
3405            let intervaltree = original_path.intervaltree(conn);
3406            let insert_start_node_id = intervaltree.query_point(16).next().unwrap().value.node_id;
3407            let insert_end_node_id = intervaltree.query_point(24).next().unwrap().value.node_id;
3408
3409            let insert_sequence = Sequence::new()
3410                .sequence_type("DNA")
3411                .sequence("AAAAAAAA")
3412                .save(conn);
3413            let insert_node_id = Node::create(
3414                conn,
3415                &insert_sequence.hash,
3416                &HashId(calculate_hash(&format!(
3417                    "test-insert-a-node.{}",
3418                    insert_sequence.hash
3419                ))),
3420            );
3421            let edge_into_insert = Edge::create(
3422                conn,
3423                insert_start_node_id,
3424                6,
3425                Strand::Forward,
3426                insert_node_id,
3427                0,
3428                Strand::Forward,
3429            );
3430            let edge_out_of_insert = Edge::create(
3431                conn,
3432                insert_node_id,
3433                8,
3434                Strand::Forward,
3435                insert_end_node_id,
3436                4,
3437                Strand::Forward,
3438            );
3439            let ref_heal_1 = Edge::create(
3440                conn,
3441                insert_start_node_id,
3442                6,
3443                Strand::Forward,
3444                insert_start_node_id,
3445                6,
3446                Strand::Forward,
3447            );
3448            let ref_heal_2 = Edge::create(
3449                conn,
3450                insert_end_node_id,
3451                4,
3452                Strand::Forward,
3453                insert_end_node_id,
3454                4,
3455                Strand::Forward,
3456            );
3457
3458            let edge_ids = [
3459                &edge_into_insert.id,
3460                &edge_out_of_insert.id,
3461                &ref_heal_1.id,
3462                &ref_heal_2.id,
3463            ];
3464            let block_group_edges = edge_ids
3465                .iter()
3466                .enumerate()
3467                .map(|(i, edge_id)| BlockGroupEdgeData {
3468                    block_group_id: block_group1_id,
3469                    edge_id: *(*edge_id),
3470                    chromosome_index: if i < 2 { 1 } else { 0 },
3471                    phased: 0,
3472                })
3473                .collect::<Vec<BlockGroupEdgeData>>();
3474            BlockGroupEdge::bulk_create(conn, &block_group_edges);
3475
3476            let insert_path =
3477                original_path.new_path_with(conn, 16, 24, &edge_into_insert, &edge_out_of_insert);
3478            assert_eq!(
3479                insert_path.sequence(conn),
3480                "AAAAAAAAAATTTTTTAAAAAAAACCCCCCGGGGGGGGGG"
3481            );
3482
3483            let insert2_start_node_id = intervaltree.query_point(28).next().unwrap().value.node_id;
3484            let insert2_end_node_id = intervaltree.query_point(32).next().unwrap().value.node_id;
3485
3486            let insert2_sequence = Sequence::new()
3487                .sequence_type("DNA")
3488                .sequence("TTTTTTTT")
3489                .save(conn);
3490            let insert2_node_id = Node::create(
3491                conn,
3492                &insert2_sequence.hash,
3493                &HashId(calculate_hash(&format!(
3494                    "test-insert-t-node.{}",
3495                    insert2_sequence.hash
3496                ))),
3497            );
3498            let edge_into_insert2 = Edge::create(
3499                conn,
3500                insert2_start_node_id,
3501                6,
3502                Strand::Forward,
3503                insert2_node_id,
3504                0,
3505                Strand::Forward,
3506            );
3507            let edge_out_of_insert2 = Edge::create(
3508                conn,
3509                insert2_node_id,
3510                8,
3511                Strand::Forward,
3512                insert2_end_node_id,
3513                4,
3514                Strand::Forward,
3515            );
3516            let ref_heal_1 = Edge::create(
3517                conn,
3518                insert2_start_node_id,
3519                6,
3520                Strand::Forward,
3521                insert2_start_node_id,
3522                6,
3523                Strand::Forward,
3524            );
3525            let ref_heal_2 = Edge::create(
3526                conn,
3527                insert2_end_node_id,
3528                4,
3529                Strand::Forward,
3530                insert2_end_node_id,
3531                4,
3532                Strand::Forward,
3533            );
3534
3535            let edge_ids = [
3536                &edge_into_insert2.id,
3537                &edge_out_of_insert2.id,
3538                &ref_heal_1.id,
3539                &ref_heal_2.id,
3540            ];
3541            let block_group_edges = edge_ids
3542                .iter()
3543                .enumerate()
3544                .map(|(i, edge_id)| BlockGroupEdgeData {
3545                    block_group_id: block_group1_id,
3546                    edge_id: *(*edge_id),
3547                    chromosome_index: if i < 2 { 1 } else { 0 },
3548                    phased: 0,
3549                })
3550                .collect::<Vec<BlockGroupEdgeData>>();
3551            BlockGroupEdge::bulk_create(conn, &block_group_edges);
3552
3553            let insert2_path =
3554                insert_path.new_path_with(conn, 28, 32, &edge_into_insert2, &edge_out_of_insert2);
3555            assert_eq!(
3556                insert2_path.sequence(conn),
3557                "AAAAAAAAAATTTTTTAAAAAAAACCTTTTTTTTGGGGGG"
3558            );
3559
3560            let all_sequences = BlockGroup::get_all_sequences(conn, &block_group1_id, false);
3561            assert_eq!(
3562                all_sequences,
3563                HashSet::from_iter(vec![
3564                    "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
3565                    "AAAAAAAAAATTTTTTAAAAAAAACCCCCCGGGGGGGGGG".to_string(),
3566                    "AAAAAAAAAATTTTTTTTTTCCCCCCTTTTTTTTGGGGGG".to_string(),
3567                    "AAAAAAAAAATTTTTTAAAAAAAACCTTTTTTTTGGGGGG".to_string(),
3568                ])
3569            );
3570
3571            let mut blocks = intervaltree
3572                .query(Range { start: 15, end: 36 })
3573                .map(|x| x.value)
3574                .collect::<Vec<_>>();
3575            blocks.sort_by(|a, b| a.start.cmp(&b.start));
3576            let start_block = blocks[0];
3577            let start_node_coordinate = 15 - start_block.start + start_block.sequence_start;
3578            let end_block = blocks[blocks.len() - 1];
3579            let end_node_coordinate = 36 - end_block.start + end_block.sequence_start;
3580
3581            let block_group2 = create_bg(conn, "test", "test", "chr1.1");
3582            BlockGroup::derive_subgraph(
3583                conn,
3584                "test",
3585                "test",
3586                &block_group1_id,
3587                &start_block,
3588                &end_block,
3589                start_node_coordinate,
3590                end_node_coordinate,
3591                &block_group2.id,
3592                true,
3593            );
3594            let all_sequences2 = BlockGroup::get_all_sequences(conn, &block_group2.id, false);
3595            assert_eq!(
3596                all_sequences2,
3597                HashSet::from_iter(vec![
3598                    "TTTTTCCCCCCCCCCGGGGGG".to_string(),
3599                    "TAAAAAAAACCCCCCGGGGGG".to_string(),
3600                    "TTTTTCCCCCCTTTTTTTTGG".to_string(),
3601                    "TAAAAAAAACCTTTTTTTTGG".to_string(),
3602                ])
3603            );
3604        }
3605
3606        #[test]
3607        fn test_derive_subgraph_two_independent_insertions_and_one_deletion() {
3608            /*
3609                       /--------------------------------------------\  (<-- Deletion edge)
3610            AAAAAAAAAA -> TTTTTTTTTT -> CCCCCCCCCC -----> GGGGGGGGGG
3611                                  \-> AAAAAAAA ->/  \->TTTTTTTT -/
3612            Subgraph range: |----------------------------------|
3613
3614            Confirms that deletion edge is ignored and not added to subgraph
3615             */
3616            let conn = &get_connection(None).unwrap();
3617            Collection::create(conn, "test");
3618            let (block_group1_id, original_path) = setup_block_group(conn);
3619
3620            let intervaltree = original_path.intervaltree(conn);
3621            let insert_start_node_id = intervaltree.query_point(16).next().unwrap().value.node_id;
3622            let insert_end_node_id = intervaltree.query_point(24).next().unwrap().value.node_id;
3623
3624            let insert_sequence = Sequence::new()
3625                .sequence_type("DNA")
3626                .sequence("AAAAAAAA")
3627                .save(conn);
3628            let insert_node_id = Node::create(
3629                conn,
3630                &insert_sequence.hash,
3631                &HashId(calculate_hash(&format!(
3632                    "test-insert-a-node.{}",
3633                    insert_sequence.hash
3634                ))),
3635            );
3636            let edge_into_insert = Edge::create(
3637                conn,
3638                insert_start_node_id,
3639                6,
3640                Strand::Forward,
3641                insert_node_id,
3642                0,
3643                Strand::Forward,
3644            );
3645            let edge_out_of_insert = Edge::create(
3646                conn,
3647                insert_node_id,
3648                8,
3649                Strand::Forward,
3650                insert_end_node_id,
3651                4,
3652                Strand::Forward,
3653            );
3654            let ref_heal_1 = Edge::create(
3655                conn,
3656                insert_start_node_id,
3657                6,
3658                Strand::Forward,
3659                insert_start_node_id,
3660                6,
3661                Strand::Forward,
3662            );
3663            let ref_heal_2 = Edge::create(
3664                conn,
3665                insert_end_node_id,
3666                4,
3667                Strand::Forward,
3668                insert_end_node_id,
3669                4,
3670                Strand::Forward,
3671            );
3672
3673            let edge_ids = [
3674                &edge_into_insert.id,
3675                &edge_out_of_insert.id,
3676                &ref_heal_1.id,
3677                &ref_heal_2.id,
3678            ];
3679            let block_group_edges = edge_ids
3680                .iter()
3681                .map(|edge_id| BlockGroupEdgeData {
3682                    block_group_id: block_group1_id,
3683                    edge_id: *(*edge_id),
3684                    chromosome_index: NO_CHROMOSOME_INDEX,
3685                    phased: 0,
3686                })
3687                .collect::<Vec<BlockGroupEdgeData>>();
3688            BlockGroupEdge::bulk_create(conn, &block_group_edges);
3689
3690            let insert_path =
3691                original_path.new_path_with(conn, 16, 24, &edge_into_insert, &edge_out_of_insert);
3692            assert_eq!(
3693                insert_path.sequence(conn),
3694                "AAAAAAAAAATTTTTTAAAAAAAACCCCCCGGGGGGGGGG"
3695            );
3696
3697            let insert2_start_node_id = intervaltree.query_point(28).next().unwrap().value.node_id;
3698            let insert2_end_node_id = intervaltree.query_point(32).next().unwrap().value.node_id;
3699
3700            let insert2_sequence = Sequence::new()
3701                .sequence_type("DNA")
3702                .sequence("TTTTTTTT")
3703                .save(conn);
3704            let insert2_node_id = Node::create(
3705                conn,
3706                &insert2_sequence.hash,
3707                &HashId(calculate_hash(&format!(
3708                    "test-insert-t-node.{}",
3709                    insert2_sequence.hash
3710                ))),
3711            );
3712            let edge_into_insert2 = Edge::create(
3713                conn,
3714                insert2_start_node_id,
3715                6,
3716                Strand::Forward,
3717                insert2_node_id,
3718                0,
3719                Strand::Forward,
3720            );
3721            let edge_out_of_insert2 = Edge::create(
3722                conn,
3723                insert2_node_id,
3724                8,
3725                Strand::Forward,
3726                insert2_end_node_id,
3727                4,
3728                Strand::Forward,
3729            );
3730            let ref_heal_1 = Edge::create(
3731                conn,
3732                insert2_start_node_id,
3733                6,
3734                Strand::Forward,
3735                insert2_start_node_id,
3736                6,
3737                Strand::Forward,
3738            );
3739            let ref_heal_2 = Edge::create(
3740                conn,
3741                insert2_end_node_id,
3742                4,
3743                Strand::Forward,
3744                insert2_end_node_id,
3745                4,
3746                Strand::Forward,
3747            );
3748
3749            let edge_ids = [
3750                &edge_into_insert2.id,
3751                &edge_out_of_insert2.id,
3752                &ref_heal_1.id,
3753                &ref_heal_2.id,
3754            ];
3755            let block_group_edges = edge_ids
3756                .iter()
3757                .map(|edge_id| BlockGroupEdgeData {
3758                    block_group_id: block_group1_id,
3759                    edge_id: *(*edge_id),
3760                    chromosome_index: NO_CHROMOSOME_INDEX,
3761                    phased: 0,
3762                })
3763                .collect::<Vec<BlockGroupEdgeData>>();
3764            BlockGroupEdge::bulk_create(conn, &block_group_edges);
3765
3766            let insert2_path =
3767                insert_path.new_path_with(conn, 28, 32, &edge_into_insert2, &edge_out_of_insert2);
3768            assert_eq!(
3769                insert2_path.sequence(conn),
3770                "AAAAAAAAAATTTTTTAAAAAAAACCTTTTTTTTGGGGGG"
3771            );
3772
3773            let deletion_end_node_id = intervaltree.query_point(38).next().unwrap().value.node_id;
3774            let deletion_edge = Edge::create(
3775                conn,
3776                insert_node_id,
3777                8,
3778                Strand::Forward,
3779                deletion_end_node_id,
3780                8,
3781                Strand::Forward,
3782            );
3783            let ref_heal_1 = Edge::create(
3784                conn,
3785                insert_node_id,
3786                8,
3787                Strand::Forward,
3788                insert_node_id,
3789                8,
3790                Strand::Forward,
3791            );
3792            let ref_heal_2 = Edge::create(
3793                conn,
3794                deletion_end_node_id,
3795                8,
3796                Strand::Forward,
3797                deletion_end_node_id,
3798                8,
3799                Strand::Forward,
3800            );
3801            let block_group_edges = [
3802                BlockGroupEdgeData {
3803                    block_group_id: block_group1_id,
3804                    edge_id: deletion_edge.id,
3805                    chromosome_index: NO_CHROMOSOME_INDEX,
3806                    phased: 0,
3807                },
3808                BlockGroupEdgeData {
3809                    block_group_id: block_group1_id,
3810                    edge_id: ref_heal_1.id,
3811                    chromosome_index: NO_CHROMOSOME_INDEX,
3812                    phased: 0,
3813                },
3814                BlockGroupEdgeData {
3815                    block_group_id: block_group1_id,
3816                    edge_id: ref_heal_2.id,
3817                    chromosome_index: NO_CHROMOSOME_INDEX,
3818                    phased: 0,
3819                },
3820            ];
3821            BlockGroupEdge::bulk_create(conn, &block_group_edges);
3822
3823            let all_sequences = BlockGroup::get_all_sequences(conn, &block_group1_id, false);
3824            assert_eq!(
3825                all_sequences,
3826                HashSet::from_iter(vec![
3827                    "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
3828                    "AAAAAAAAAATTTTTTAAAAAAAACCCCCCGGGGGGGGGG".to_string(),
3829                    "AAAAAAAAAATTTTTTTTTTCCCCCCTTTTTTTTGGGGGG".to_string(),
3830                    "AAAAAAAAAATTTTTTAAAAAAAACCTTTTTTTTGGGGGG".to_string(),
3831                    "AAAAAAAAAATTTTTTAAAAAAAAGG".to_string(), // Sequence including deletion
3832                ])
3833            );
3834
3835            let mut blocks = intervaltree
3836                .query(Range { start: 15, end: 36 })
3837                .map(|x| x.value)
3838                .collect::<Vec<_>>();
3839            blocks.sort_by(|a, b| a.start.cmp(&b.start));
3840            let start_block = blocks[0];
3841            let start_node_coordinate = 15 - start_block.start + start_block.sequence_start;
3842            let end_block = blocks[blocks.len() - 1];
3843            let end_node_coordinate = 36 - end_block.start + end_block.sequence_start;
3844
3845            let block_group2 = create_bg(conn, "test", "test", "chr1.1");
3846            BlockGroup::derive_subgraph(
3847                conn,
3848                "test",
3849                "test",
3850                &block_group1_id,
3851                &start_block,
3852                &end_block,
3853                start_node_coordinate,
3854                end_node_coordinate,
3855                &block_group2.id,
3856                true,
3857            );
3858            let all_sequences2 = BlockGroup::get_all_sequences(conn, &block_group2.id, false);
3859            assert_eq!(
3860                all_sequences2,
3861                // The deletion is not included in the cloned subgraph since one end of it is
3862                // outside the specified range
3863                HashSet::from_iter(vec![
3864                    "TTTTTCCCCCCCCCCGGGGGG".to_string(),
3865                    "TAAAAAAAACCCCCCGGGGGG".to_string(),
3866                    "TTTTTCCCCCCTTTTTTTTGG".to_string(),
3867                    "TAAAAAAAACCTTTTTTTTGG".to_string(),
3868                ])
3869            );
3870        }
3871    }
3872}