Skip to main content

gen_models/
block_group.rs

1use std::{
2    collections::{HashMap, HashSet},
3    hash::Hash,
4    rc::Rc,
5};
6
7use gen_core::{
8    HashId, NO_CHROMOSOME_INDEX, NodeIntervalBlock, PATH_END_NODE_ID, PATH_START_NODE_ID,
9    PRESERVE_EDIT_SITE_CHROMOSOME_INDEX, PathBlock, Strand, calculate_hash, is_end_node,
10    is_start_node, is_terminal, traits::Capnp,
11};
12use gen_graph::{
13    GenGraph, GraphNode, all_intermediate_edges, all_reachable_nodes, all_simple_paths,
14    flatten_to_interval_tree,
15};
16use intervaltree::IntervalTree;
17use rusqlite::{Row, params, types::Value as SQLValue};
18use serde::{Deserialize, Serialize};
19
20use crate::{
21    accession::{Accession, AccessionEdge, AccessionEdgeData, AccessionPath},
22    block_group_edge::{AugmentedEdgeData, BlockGroupEdge, BlockGroupEdgeData},
23    db::GraphConnection,
24    edge::{Edge, EdgeData, GroupBlock},
25    errors::{ChangeError, QueryError},
26    gen_models_capnp::block_group,
27    node::Node,
28    path::{Path, PathData},
29    path_edge::PathEdge,
30    traits::*,
31};
32
33#[derive(Clone, Debug, Deserialize, Eq, Hash, Serialize, PartialEq)]
34pub struct BlockGroup {
35    pub id: HashId,
36    pub collection_name: String,
37    pub sample_name: Option<String>,
38    pub name: String,
39    pub created_on: i64,
40}
41
42impl<'a> Capnp<'a> for BlockGroup {
43    type Builder = block_group::Builder<'a>;
44    type Reader = block_group::Reader<'a>;
45
46    fn write_capnp(&self, builder: &mut Self::Builder) {
47        builder.set_id(&self.id.0).unwrap();
48        builder.set_collection_name(&self.collection_name);
49        match &self.sample_name {
50            None => {
51                builder.reborrow().get_sample_name().set_none(());
52            }
53            Some(n) => {
54                builder.reborrow().get_sample_name().set_some(n.clone());
55            }
56        }
57        builder.set_name(&self.name);
58        builder.set_created_on(self.created_on);
59    }
60
61    fn read_capnp(reader: Self::Reader) -> Self {
62        let id = reader
63            .get_id()
64            .unwrap()
65            .as_slice()
66            .unwrap()
67            .try_into()
68            .unwrap();
69        let collection_name = reader.get_collection_name().unwrap().to_string().unwrap();
70        let sample_name: Option<String> = match reader.get_sample_name().which().unwrap() {
71            block_group::sample_name::None(()) => None,
72            block_group::sample_name::Some(n) => Some(n.unwrap().to_string().unwrap()),
73        };
74        let name = reader.get_name().unwrap().to_string().unwrap();
75        let created_on = reader.get_created_on();
76
77        BlockGroup {
78            id,
79            collection_name,
80            sample_name,
81            name,
82            created_on,
83        }
84    }
85}
86
87#[derive(Clone, Debug, Eq, Hash, PartialEq)]
88pub struct BlockGroupData<'a> {
89    pub collection_name: &'a str,
90    pub sample_name: Option<&'a str>,
91    pub name: String,
92}
93
94#[derive(Clone, Debug)]
95pub struct PathChange {
96    pub block_group_id: HashId,
97    pub path: Path,
98    pub path_accession: Option<String>,
99    pub start: i64,
100    pub end: i64,
101    pub block: PathBlock,
102    pub chromosome_index: i64,
103    pub phased: i64,
104    pub preserve_edge: bool,
105}
106
107pub struct PathCache<'a> {
108    pub cache: HashMap<PathData, Path>,
109    pub intervaltree_cache: HashMap<Path, IntervalTree<i64, NodeIntervalBlock>>,
110    pub conn: &'a GraphConnection,
111}
112
113impl<'a> PathCache<'a> {
114    pub fn new(conn: &'a GraphConnection) -> PathCache<'a> {
115        PathCache {
116            cache: HashMap::<PathData, Path>::new(),
117            intervaltree_cache: HashMap::<Path, IntervalTree<i64, NodeIntervalBlock>>::new(),
118            conn,
119        }
120    }
121
122    pub fn lookup(path_cache: &mut PathCache, block_group_id: &HashId, name: String) -> Path {
123        let path_key = PathData {
124            name: name.clone(),
125            block_group_id: *block_group_id,
126        };
127        let path_lookup = path_cache.cache.get(&path_key);
128        if let Some(path) = path_lookup {
129            path.clone()
130        } else {
131            let conn = path_cache.conn;
132            let new_path = Path::query(
133                conn,
134                "select * from paths where block_group_id = ?1 AND name = ?2",
135                params![block_group_id, name],
136            )[0]
137            .clone();
138
139            path_cache.cache.insert(path_key, new_path.clone());
140            let tree = new_path.intervaltree(conn);
141            path_cache.intervaltree_cache.insert(new_path.clone(), tree);
142            new_path
143        }
144    }
145
146    pub fn get_intervaltree<'b>(
147        path_cache: &'b PathCache<'_>,
148        path: &Path,
149    ) -> Option<&'b IntervalTree<i64, NodeIntervalBlock>> {
150        path_cache.intervaltree_cache.get(path)
151    }
152}
153
154impl BlockGroup {
155    pub fn create(
156        conn: &GraphConnection,
157        collection_name: &str,
158        sample_name: Option<&str>,
159        name: &str,
160    ) -> BlockGroup {
161        let hash = HashId(calculate_hash(&format!(
162            "{collection_name}:{sample_name:?}:{name}"
163        )));
164        let timestamp = chrono::Utc::now().timestamp_nanos_opt().unwrap();
165        let query = "INSERT INTO block_groups (id, collection_name, sample_name, name, created_on) VALUES (?1, ?2, ?3, ?4, ?5);";
166        let mut stmt = conn.prepare(query).unwrap();
167        let bg = BlockGroup {
168            id: hash,
169            collection_name: collection_name.to_string(),
170            sample_name: sample_name.map(|s| s.to_string()),
171            name: name.to_string(),
172            created_on: timestamp,
173        };
174        match stmt.execute((hash, collection_name, sample_name, name, timestamp)) {
175            Ok(_) => bg,
176            Err(rusqlite::Error::SqliteFailure(err, _details)) => {
177                if err.code == rusqlite::ErrorCode::ConstraintViolation {
178                    bg
179                } else {
180                    panic!("something bad happened querying the database")
181                }
182            }
183            Err(_) => {
184                panic!("something bad happened querying the database")
185            }
186        }
187    }
188
189    pub fn delete(
190        conn: &GraphConnection,
191        collection_name: &str,
192        sample_name: Option<&str>,
193        name: &str,
194    ) {
195        if let Some(n) = sample_name {
196            let query = "delete from block_groups where collection_name = ?1 and sample_name = ?2 and name = ?3;";
197            conn.execute(
198                query,
199                params![collection_name.to_string(), n.to_string(), name.to_string()],
200            )
201            .unwrap();
202        } else {
203            let query = "delete from block_groups where collection_name = ?1 and sample_name is null and name = ?2;";
204            conn.execute(
205                query,
206                params![collection_name.to_string(), name.to_string()],
207            )
208            .unwrap();
209        }
210    }
211
212    pub fn get_by_id(conn: &GraphConnection, id: &HashId) -> BlockGroup {
213        let query = "SELECT * FROM block_groups WHERE id = ?1";
214        let mut stmt = conn.prepare(query).unwrap();
215        match stmt.query_row([id], |row| Ok(Self::process_row(row))) {
216            Ok(res) => res,
217            Err(rusqlite::Error::QueryReturnedNoRows) => panic!("No block group with id {id}"),
218            Err(_) => {
219                panic!("something bad happened querying the database")
220            }
221        }
222    }
223
224    pub fn duplicate(&self, conn: &GraphConnection, target_block_group_id: &HashId) {
225        let existing_paths = Path::query(
226            conn,
227            "SELECT * from paths where block_group_id = ?1;",
228            params![self.id],
229        );
230
231        let augmented_edges = BlockGroupEdge::edges_for_block_group(conn, &self.id);
232        let edge_ids = augmented_edges
233            .iter()
234            .map(|edge| edge.edge.id)
235            .collect::<Vec<_>>();
236        let new_block_group_edges = edge_ids
237            .iter()
238            .enumerate()
239            .map(|(i, edge_id)| BlockGroupEdgeData {
240                block_group_id: *target_block_group_id,
241                edge_id: *edge_id,
242                chromosome_index: augmented_edges[i].chromosome_index,
243                phased: augmented_edges[i].phased,
244            })
245            .collect::<Vec<_>>();
246        BlockGroupEdge::bulk_create(conn, &new_block_group_edges);
247
248        let mut path_map = HashMap::new();
249
250        for path in existing_paths.iter() {
251            let edge_ids = PathEdge::edges_for_path(conn, &path.id)
252                .into_iter()
253                .map(|edge| edge.id)
254                .collect::<Vec<_>>();
255            let new_path = Path::create(conn, &path.name, target_block_group_id, &edge_ids);
256            path_map.insert(&path.id, new_path.id);
257        }
258
259        for accession in Accession::query(
260            conn,
261            "select * from accessions where path_id IN rarray(?1);",
262            rusqlite::params!(Rc::new(
263                existing_paths
264                    .iter()
265                    .map(|path| SQLValue::from(path.id))
266                    .collect::<Vec<SQLValue>>()
267            )),
268        ) {
269            let edges = AccessionPath::query(
270                conn,
271                "Select * from accession_paths where accession_id = ?1 order by index_in_path ASC;",
272                rusqlite::params!(SQLValue::from(accession.id)),
273            );
274            let new_path_id = &path_map[&accession.path_id];
275            let obj = Accession::create(
276                conn,
277                &accession.name,
278                new_path_id,
279                accession.parent_accession_id.as_ref(),
280            )
281            .expect("Unable to create accession in clone.");
282            AccessionPath::create(
283                conn,
284                &obj.id,
285                &edges.iter().map(|ap| ap.edge_id).collect::<Vec<_>>(),
286            );
287        }
288    }
289
290    pub fn get_or_create_sample_block_group(
291        conn: &GraphConnection,
292        collection_name: &str,
293        sample_name: &str,
294        group_name: &str,
295        parent_sample: Option<&str>,
296    ) -> Result<HashId, QueryError> {
297        let binding = BlockGroup::query(
298            conn,
299            "select * from block_groups where collection_name = ?1 AND sample_name = ?2 AND name = ?3",
300            params![collection_name, sample_name, group_name],
301        );
302        let initial_query_result = binding.first();
303
304        if let Some(block_group) = initial_query_result {
305            return Ok(block_group.id);
306        }
307
308        let block_groups = if let Some(parent_sample_name) = parent_sample {
309            // use the base reference group if it exists
310            BlockGroup::query(
311                conn,
312                "select * from block_groups where collection_name = ?1 AND sample_name = ?2 AND name = ?3",
313                params![collection_name, parent_sample_name, group_name],
314            )
315        } else {
316            BlockGroup::query(
317                conn,
318                "select * from block_groups where collection_name = ?1 AND sample_name IS null AND name = ?2",
319                params![collection_name, group_name],
320            )
321        };
322
323        if let Some(parent_block_group) = block_groups.first() {
324            let new_block_group =
325                BlockGroup::create(conn, collection_name, Some(sample_name), group_name);
326
327            // clone parent blocks/edges/path
328            parent_block_group.duplicate(conn, &new_block_group.id);
329
330            Ok(new_block_group.id)
331        } else {
332            let error_message = if let Some(parent_sample_name) = parent_sample {
333                format!(
334                    "Block group not found for either new sample ({sample_name}) or parent sample ({parent_sample_name})"
335                )
336            } else {
337                format!(
338                    "Block group not found for new sample ({sample_name}) and default parent sample"
339                )
340            };
341            Err(QueryError::ResultsNotFound(error_message))
342        }
343    }
344
345    pub fn get_id(collection_name: &str, sample_name: Option<&str>, group_name: &str) -> HashId {
346        HashId(calculate_hash(&format!(
347            "{collection_name}:{sample_name:?}:{group_name}"
348        )))
349    }
350
351    pub fn get_graph(conn: &GraphConnection, block_group_id: &HashId) -> GenGraph {
352        let edges = BlockGroupEdge::edges_for_block_group(conn, block_group_id);
353        let blocks = Edge::blocks_from_edges(conn, &edges);
354        let (graph, _) = Edge::build_graph(&edges, &blocks);
355        graph
356    }
357
358    pub fn prune_graph(graph: &mut GenGraph) {
359        // Prunes a graph by removing edges on the same chromosome_index. This means if 2 edges are
360        // both "chromosome index 0", we keep the newer one.
361        let mut root_nodes = HashSet::new();
362        let mut edges_to_remove: Vec<(GraphNode, GraphNode)> = vec![];
363        for node in graph.nodes() {
364            if node.node_id == PATH_START_NODE_ID {
365                root_nodes.insert(node);
366            }
367            let mut edges_by_ci: HashMap<i64, (GraphNode, GraphNode, i64)> = HashMap::new();
368            for (source_node, target_node, edge_weights) in graph.edges(node) {
369                for edge_weight in edge_weights {
370                    if edge_weight.chromosome_index == NO_CHROMOSOME_INDEX {
371                        continue;
372                    }
373                    if edge_weight.chromosome_index == PRESERVE_EDIT_SITE_CHROMOSOME_INDEX {
374                        edges_to_remove.push((source_node, target_node));
375                        continue;
376                    }
377                    edges_by_ci
378                        .entry(edge_weight.chromosome_index)
379                        .and_modify(|(source, target, created_on)| {
380                            if edge_weight.created_on > *created_on {
381                                edges_to_remove.push((*source, *target));
382                                *source = source_node;
383                                *target = target_node;
384                                *created_on = edge_weight.created_on;
385                            } else {
386                                edges_to_remove.push((source_node, target_node));
387                            }
388                        })
389                        .or_insert((source_node, target_node, edge_weight.created_on));
390                }
391            }
392        }
393
394        for (source, target) in edges_to_remove.iter() {
395            graph.remove_edge(*source, *target);
396        }
397
398        let reachable_nodes = all_reachable_nodes(&*graph, &Vec::from_iter(root_nodes));
399        let mut to_remove = vec![];
400        for node in graph.nodes() {
401            if !reachable_nodes.contains(&node) {
402                to_remove.push(node);
403            }
404        }
405        for node in to_remove {
406            graph.remove_node(node);
407        }
408    }
409
410    pub fn get_all_sequences(
411        conn: &GraphConnection,
412        block_group_id: &HashId,
413        _prune: bool,
414    ) -> HashSet<String> {
415        let edges = BlockGroupEdge::edges_for_block_group(conn, block_group_id)
416            .into_iter()
417            .filter(|edge| edge.chromosome_index != PRESERVE_EDIT_SITE_CHROMOSOME_INDEX)
418            .collect::<Vec<_>>();
419        let blocks = Edge::blocks_from_edges(conn, &edges);
420
421        let (mut graph, _) = Edge::build_graph(&edges, &blocks);
422        BlockGroup::prune_graph(&mut graph);
423
424        let mut start_nodes = vec![];
425        let mut end_nodes = vec![];
426        for node in graph.nodes() {
427            if is_start_node(node.node_id) {
428                start_nodes.push(node);
429            } else if is_end_node(node.node_id) {
430                end_nodes.push(node);
431            }
432        }
433        let blocks_by_id = blocks
434            .clone()
435            .into_iter()
436            .map(|block| (block.id, block))
437            .collect::<HashMap<i64, GroupBlock>>();
438        let mut sequences = HashSet::<String>::new();
439
440        for start_node in start_nodes {
441            for end_node in &end_nodes {
442                // TODO: maybe make all_simple_paths return a single path id where start == end
443                if start_node == *end_node {
444                    let block = blocks_by_id.get(&start_node.block_id).unwrap();
445                    if block.node_id != PATH_START_NODE_ID && block.node_id != PATH_END_NODE_ID {
446                        sequences.insert(block.sequence());
447                    }
448                } else {
449                    for path in all_simple_paths(&graph, start_node, *end_node) {
450                        let mut current_sequence = "".to_string();
451                        for node in path {
452                            let block = blocks_by_id.get(&node.block_id).unwrap();
453                            let block_sequence = block.sequence();
454                            current_sequence.push_str(&block_sequence);
455                        }
456                        sequences.insert(current_sequence);
457                    }
458                }
459            }
460        }
461
462        sequences
463    }
464
465    pub fn add_accession(
466        conn: &GraphConnection,
467        path: &Path,
468        name: &str,
469        start: i64,
470        end: i64,
471        cache: &mut PathCache,
472    ) -> Accession {
473        let tree = PathCache::get_intervaltree(cache, path).unwrap();
474        let start_blocks: Vec<&NodeIntervalBlock> =
475            tree.query_point(start).map(|x| &x.value).collect();
476        assert_eq!(start_blocks.len(), 1);
477        let start_block = start_blocks[0];
478        let end_blocks: Vec<&NodeIntervalBlock> = tree.query_point(end).map(|x| &x.value).collect();
479        assert_eq!(end_blocks.len(), 1);
480        let end_block = end_blocks[0];
481        // we make a start/end edge for the accession start/end, then fill in the middle
482        // with any existing edges
483        let start_edge = AccessionEdgeData {
484            source_node_id: PATH_START_NODE_ID,
485            source_coordinate: -1,
486            source_strand: Strand::Forward,
487            target_node_id: start_block.node_id,
488            target_coordinate: start - start_block.start + start_block.sequence_start,
489            target_strand: Strand::Forward,
490            chromosome_index: 0,
491        };
492        let end_edge = AccessionEdgeData {
493            source_node_id: end_block.node_id,
494            source_coordinate: end - end_block.start + end_block.sequence_start,
495            source_strand: Strand::Forward,
496            target_node_id: PATH_END_NODE_ID,
497            target_coordinate: -1,
498            target_strand: Strand::Forward,
499            chromosome_index: 0,
500        };
501        let accession =
502            Accession::create(conn, name, &path.id, None).expect("Unable to create accession.");
503        let mut path_edges = vec![start_edge];
504        if start_block == end_block {
505            path_edges.push(end_edge);
506        } else {
507            let mut in_range = false;
508            let path_blocks: Vec<&NodeIntervalBlock> = tree
509                .iter_sorted()
510                .map(|x| &x.value)
511                .filter(|block| {
512                    if block.block_id == start_block.block_id {
513                        in_range = true;
514                    } else if block.block_id == end_block.block_id {
515                        in_range = false;
516                        return true;
517                    }
518                    in_range
519                })
520                .collect::<Vec<_>>();
521            // if start and end block are not the same, we will always have at least 2 elements in path_blocks
522            for (block, next_block) in path_blocks.iter().zip(path_blocks[1..].iter()) {
523                path_edges.push(AccessionEdgeData {
524                    source_node_id: block.node_id,
525                    source_coordinate: block.sequence_end,
526                    source_strand: block.strand,
527                    target_node_id: next_block.node_id,
528                    target_coordinate: next_block.sequence_start,
529                    target_strand: next_block.strand,
530                    chromosome_index: 0,
531                })
532            }
533            path_edges.push(end_edge);
534        }
535        AccessionPath::create(
536            conn,
537            &accession.id,
538            &AccessionEdge::bulk_create(conn, &path_edges),
539        );
540        accession
541    }
542
543    pub fn insert_changes(
544        conn: &GraphConnection,
545        changes: &[PathChange],
546        cache: &mut PathCache,
547        modify_blockgroup: bool,
548    ) -> Result<(), ChangeError> {
549        let mut new_augmented_edges_by_block_group =
550            HashMap::<&HashId, Vec<AugmentedEdgeData>>::new();
551        let mut new_accession_edges = HashMap::new();
552        let mut tree_map = HashMap::new();
553        for change in changes {
554            let tree = if modify_blockgroup {
555                tree_map.entry(change.block_group_id).or_insert_with(|| {
556                    BlockGroup::intervaltree_for(conn, &change.block_group_id, true)
557                })
558            } else {
559                PathCache::get_intervaltree(cache, &change.path).unwrap()
560            };
561            let new_augmented_edges = BlockGroup::set_up_new_edges(change, tree)?;
562            new_augmented_edges_by_block_group
563                .entry(&change.block_group_id)
564                .and_modify(|new_edge_data| new_edge_data.extend(new_augmented_edges.clone()))
565                .or_insert_with(|| new_augmented_edges.clone());
566            if let Some(accession) = &change.path_accession {
567                new_accession_edges
568                    .entry((&change.path, accession))
569                    .and_modify(|new_edge_data: &mut Vec<AugmentedEdgeData>| {
570                        new_edge_data.extend(new_augmented_edges.clone())
571                    })
572                    .or_insert_with(|| new_augmented_edges.clone());
573            }
574        }
575
576        let mut edge_data_map = HashMap::new();
577
578        for (block_group_id, new_augmented_edges) in new_augmented_edges_by_block_group {
579            let new_edges = new_augmented_edges
580                .iter()
581                .map(|augmented_edge| augmented_edge.edge_data)
582                .collect::<Vec<_>>();
583            let edge_ids = Edge::bulk_create(conn, &new_edges);
584            for (i, edge_data) in new_edges.iter().enumerate() {
585                edge_data_map.insert(*edge_data, edge_ids[i]);
586            }
587            let new_block_group_edges = edge_ids
588                .iter()
589                .enumerate()
590                .map(|(i, edge_id)| BlockGroupEdgeData {
591                    block_group_id: *block_group_id,
592                    edge_id: *edge_id,
593                    chromosome_index: new_augmented_edges[i].chromosome_index,
594                    phased: new_augmented_edges[i].phased,
595                })
596                .collect::<Vec<_>>();
597            BlockGroupEdge::bulk_create(conn, &new_block_group_edges);
598        }
599
600        for ((path, accession_name), path_edges) in new_accession_edges {
601            match Accession::get(
602                conn,
603                "select * from accessions where name = ?1 AND path_id = ?2",
604                params![accession_name, path.id],
605            ) {
606                Ok(_) => {
607                    println!(
608                        "accession already exists, consider a better matching algorithm to determine if this is an error."
609                    );
610                }
611                Err(_) => {
612                    let acc_edges = AccessionEdge::bulk_create(
613                        conn,
614                        &path_edges
615                            .iter()
616                            .map(AccessionEdgeData::from)
617                            .collect::<Vec<_>>(),
618                    );
619                    let acc = Accession::create(conn, accession_name, &path.id, None)
620                        .expect("Accession could not be created.");
621                    AccessionPath::create(conn, &acc.id, &acc_edges);
622                }
623            }
624        }
625        Ok(())
626    }
627
628    #[allow(clippy::ptr_arg)]
629    #[allow(clippy::needless_late_init)]
630    pub fn insert_change(
631        conn: &GraphConnection,
632        change: &PathChange,
633        tree: &IntervalTree<i64, NodeIntervalBlock>,
634    ) -> Result<(), ChangeError> {
635        let new_augmented_edges = BlockGroup::set_up_new_edges(change, tree)?;
636        let new_edges = new_augmented_edges
637            .iter()
638            .map(|augmented_edge| augmented_edge.edge_data)
639            .collect::<Vec<_>>();
640        let edge_ids = Edge::bulk_create(conn, &new_edges);
641        let new_block_group_edges = edge_ids
642            .iter()
643            .enumerate()
644            .map(|(i, edge_id)| BlockGroupEdgeData {
645                block_group_id: change.block_group_id,
646                edge_id: *edge_id,
647                chromosome_index: new_augmented_edges[i].chromosome_index,
648                phased: new_augmented_edges[i].phased,
649            })
650            .collect::<Vec<_>>();
651        BlockGroupEdge::bulk_create(conn, &new_block_group_edges);
652        Ok(())
653    }
654
655    fn set_up_new_edges(
656        change: &PathChange,
657        tree: &IntervalTree<i64, NodeIntervalBlock>,
658    ) -> Result<Vec<AugmentedEdgeData>, ChangeError> {
659        let start_blocks: Vec<&NodeIntervalBlock> =
660            tree.query_point(change.start).map(|x| &x.value).collect();
661        assert_eq!(start_blocks.len(), 1);
662        // NOTE: This may not be used but needs to be initialized here instead of inside the if
663        // statement that uses it, so that the borrow checker is happy
664        let previous_start_blocks: Vec<&NodeIntervalBlock> = tree
665            .query_point(change.start - 1)
666            .map(|x| &x.value)
667            .collect();
668        assert_eq!(previous_start_blocks.len(), 1);
669        let start_block = if start_blocks[0].start == change.start {
670            // First part of this block will be replaced/deleted, need to get previous block to add
671            // edge including it
672            previous_start_blocks[0]
673        } else {
674            start_blocks[0]
675        };
676
677        // Ensure the change is within the path bounds. The logic here is a bit backwards, where
678        // we check if the start is before the start block's end and the end is before the end
679        // block's start. This is because the terminal blocks start and end at the bounds of the
680        // interval tree. So while it's ok to have a start/end block be the start/end block (for
681        // changes at the extremes, it's not ok for the change to start beyond the current
682        // boundaries.
683        if is_start_node(start_block.node_id) && change.start < start_block.end {
684            return Err(ChangeError::OutOfBounds(format!(
685                "Invalid change specified. Coordinate {pos} is before start of path range ({path_pos}).",
686                pos = change.start,
687                path_pos = start_block.end
688            )));
689        }
690
691        let end_blocks: Vec<&NodeIntervalBlock> =
692            tree.query_point(change.end).map(|x| &x.value).collect();
693        assert_eq!(end_blocks.len(), 1);
694        let end_block = end_blocks[0];
695
696        if is_end_node(end_block.node_id) && change.end > end_block.start {
697            return Err(ChangeError::OutOfBounds(format!(
698                "Invalid change specified. Coordinate {pos} is before start of path range ({path_pos}).",
699                pos = change.end,
700                path_pos = end_block.start
701            )));
702        }
703
704        let mut new_edges = vec![];
705
706        if change.block.sequence_start == change.block.sequence_end {
707            // Deletion
708            let source_coordinate = change.start - start_block.start + start_block.sequence_start;
709            let target_coordinate = change.end - end_block.start + end_block.sequence_start;
710            let mut aug_edges = vec![];
711            let new_edge = EdgeData {
712                source_node_id: start_block.node_id,
713                source_coordinate,
714                source_strand: Strand::Forward,
715                target_node_id: end_block.node_id,
716                target_coordinate,
717                target_strand: Strand::Forward,
718            };
719            aug_edges.push(AugmentedEdgeData {
720                edge_data: new_edge,
721                chromosome_index: change.chromosome_index,
722                phased: change.phased,
723            });
724
725            // NOTE: If the deletion is happening at the very beginning of a path, we need to add
726            // an edge from the dedicated start node to the end of the deletion, to indicate it's
727            // another start point in the block group DAG.
728            if change.start == 0 {
729                let target_coordinate = change.end - end_block.start + end_block.sequence_start;
730                let new_beginning_edge = EdgeData {
731                    source_node_id: PATH_START_NODE_ID,
732                    source_coordinate: 0,
733                    source_strand: Strand::Forward,
734                    target_node_id: end_block.node_id,
735                    target_coordinate,
736                    target_strand: Strand::Forward,
737                };
738                aug_edges.push(AugmentedEdgeData {
739                    edge_data: new_beginning_edge,
740                    chromosome_index: change.chromosome_index,
741                    phased: change.phased,
742                });
743                if !is_terminal(end_block.node_id) {
744                    new_edges.push(AugmentedEdgeData {
745                        edge_data: EdgeData {
746                            source_node_id: end_block.node_id,
747                            source_coordinate: target_coordinate,
748                            source_strand: Strand::Forward,
749                            target_node_id: end_block.node_id,
750                            target_coordinate,
751                            target_strand: Strand::Forward,
752                        },
753                        chromosome_index: if change.preserve_edge {
754                            0
755                        } else {
756                            PRESERVE_EDIT_SITE_CHROMOSOME_INDEX
757                        },
758                        phased: 0,
759                    });
760                }
761            } else {
762                if !is_terminal(start_block.node_id) {
763                    new_edges.push(AugmentedEdgeData {
764                        edge_data: EdgeData {
765                            source_node_id: start_block.node_id,
766                            source_coordinate,
767                            source_strand: Strand::Forward,
768                            target_node_id: start_block.node_id,
769                            target_coordinate: source_coordinate,
770                            target_strand: Strand::Forward,
771                        },
772                        chromosome_index: if change.preserve_edge {
773                            0
774                        } else {
775                            PRESERVE_EDIT_SITE_CHROMOSOME_INDEX
776                        },
777                        phased: 0,
778                    });
779                };
780                if !is_terminal(end_block.node_id) {
781                    new_edges.push(AugmentedEdgeData {
782                        edge_data: EdgeData {
783                            source_node_id: end_block.node_id,
784                            source_coordinate: target_coordinate,
785                            source_strand: Strand::Forward,
786                            target_node_id: end_block.node_id,
787                            target_coordinate,
788                            target_strand: Strand::Forward,
789                        },
790                        chromosome_index: if change.preserve_edge {
791                            0
792                        } else {
793                            PRESERVE_EDIT_SITE_CHROMOSOME_INDEX
794                        },
795                        phased: 0,
796                    });
797                }
798            }
799            new_edges.extend(aug_edges);
800        // NOTE: If the deletion is happening at the very end of a path, we might add an edge
801        // from the beginning of the deletion to the dedicated end node, but in practice it
802        // doesn't affect sequence readouts, so it may not be worth it.
803        } else {
804            // Insertion/replacement
805            let insertion_start_coordinate =
806                change.start - start_block.start + start_block.sequence_start;
807            let new_start_edge = EdgeData {
808                source_node_id: start_block.node_id,
809                source_coordinate: insertion_start_coordinate,
810                source_strand: Strand::Forward,
811                target_node_id: change.block.node_id,
812                target_coordinate: change.block.sequence_start,
813                target_strand: Strand::Forward,
814            };
815            let new_augmented_start_edge = AugmentedEdgeData {
816                edge_data: new_start_edge,
817                chromosome_index: change.chromosome_index,
818                phased: change.phased,
819            };
820            let insertion_end_coordinate = change.end - end_block.start + end_block.sequence_start;
821            let new_end_edge = EdgeData {
822                source_node_id: change.block.node_id,
823                source_coordinate: change.block.sequence_end,
824                source_strand: Strand::Forward,
825                target_node_id: end_block.node_id,
826                target_coordinate: insertion_end_coordinate,
827                target_strand: Strand::Forward,
828            };
829            let new_augmented_end_edge = AugmentedEdgeData {
830                edge_data: new_end_edge,
831                chromosome_index: change.chromosome_index,
832                phased: change.phased,
833            };
834
835            if change.start == 0 {
836                new_edges.push(AugmentedEdgeData {
837                    edge_data: EdgeData {
838                        source_node_id: PATH_START_NODE_ID,
839                        source_coordinate: 0,
840                        source_strand: Strand::Forward,
841                        target_node_id: change.block.node_id,
842                        target_coordinate: change.block.sequence_start,
843                        target_strand: Strand::Forward,
844                    },
845                    chromosome_index: change.chromosome_index,
846                    phased: 0,
847                });
848            }
849
850            if !is_terminal(start_block.node_id) {
851                new_edges.push(AugmentedEdgeData {
852                    edge_data: EdgeData {
853                        source_node_id: start_block.node_id,
854                        source_coordinate: insertion_start_coordinate,
855                        source_strand: Strand::Forward,
856                        target_node_id: start_block.node_id,
857                        target_coordinate: insertion_start_coordinate,
858                        target_strand: Strand::Forward,
859                    },
860                    chromosome_index: if change.preserve_edge {
861                        0
862                    } else {
863                        PRESERVE_EDIT_SITE_CHROMOSOME_INDEX
864                    },
865                    phased: 0,
866                });
867            }
868            if !is_terminal(end_block.node_id) {
869                new_edges.push(AugmentedEdgeData {
870                    edge_data: EdgeData {
871                        source_node_id: end_block.node_id,
872                        source_coordinate: insertion_end_coordinate,
873                        source_strand: Strand::Forward,
874                        target_node_id: end_block.node_id,
875                        target_coordinate: insertion_end_coordinate,
876                        target_strand: Strand::Forward,
877                    },
878                    chromosome_index: if change.preserve_edge {
879                        0
880                    } else {
881                        PRESERVE_EDIT_SITE_CHROMOSOME_INDEX
882                    },
883                    phased: 0,
884                });
885            }
886
887            new_edges.push(new_augmented_start_edge);
888            new_edges.push(new_augmented_end_edge);
889        }
890
891        Ok(new_edges)
892    }
893
894    pub fn intervaltree_for(
895        conn: &GraphConnection,
896        block_group_id: &HashId,
897        remove_ambiguous_positions: bool,
898    ) -> IntervalTree<i64, NodeIntervalBlock> {
899        // make a tree where every node has a span in the graph.
900        let mut graph = BlockGroup::get_graph(conn, block_group_id);
901        BlockGroup::prune_graph(&mut graph);
902        flatten_to_interval_tree(&graph, remove_ambiguous_positions)
903    }
904
905    pub fn get_current_path(conn: &GraphConnection, block_group_id: &HashId) -> Path {
906        let paths = Path::query(
907            conn,
908            "SELECT * FROM paths WHERE block_group_id = ?1 ORDER BY created_on DESC",
909            params![block_group_id],
910        );
911        paths[0].clone()
912    }
913
914    pub fn get_path_by_name(
915        conn: &GraphConnection,
916        block_group_id: &HashId,
917        path_name: &str,
918    ) -> Option<Path> {
919        let paths = Path::query(
920            conn,
921            "SELECT * FROM paths WHERE block_group_id = ?1 ORDER BY created_on DESC",
922            params![block_group_id],
923        );
924
925        for path in &paths {
926            if path.name == path_name {
927                return Some(path.clone());
928            }
929        }
930
931        None
932    }
933
934    #[allow(clippy::too_many_arguments)]
935    pub fn derive_subgraph(
936        conn: &GraphConnection,
937        collection_name: &str,
938        child_sample_name: &str,
939        source_block_group_id: &HashId,
940        start_block: &NodeIntervalBlock,
941        end_block: &NodeIntervalBlock,
942        start_node_coordinate: i64,
943        end_node_coordinate: i64,
944        target_block_group_id: &HashId,
945    ) -> HashMap<HashId, HashId> {
946        let current_graph = BlockGroup::get_graph(conn, source_block_group_id);
947        let start_node = current_graph
948            .nodes()
949            .find(|node| {
950                node.node_id == start_block.node_id
951                    && node.sequence_start <= start_node_coordinate
952                    && node.sequence_end >= start_node_coordinate
953            })
954            .unwrap();
955        let end_node = current_graph
956            .nodes()
957            .find(|node| {
958                node.node_id == end_block.node_id
959                    && node.sequence_start <= end_node_coordinate
960                    && node.sequence_end >= end_node_coordinate
961            })
962            .unwrap();
963        let subgraph_edges = all_intermediate_edges(&current_graph, start_node, end_node);
964
965        // Filter out internal edges (boundary edges) that don't exist in the database
966        let subgraph_edge_ids = subgraph_edges
967            .iter()
968            .map(|(_to, _from, edge_info)| edge_info[0].edge_id)
969            .collect::<Vec<_>>();
970        let source_edges = Edge::query_by_ids(conn, &subgraph_edge_ids);
971
972        // Instead of reusing the existing edges, we create copies of the nodes and edges.  This
973        // makes it easier to recombine subgraphs later.
974        // TODO: Annotation support. We do not reuse node ids because nodes control the graph
975        // topology. If we duplicated nodes via these operations, it would leave to unintentional
976        // cycles. A node cannot exist in 2 places. This will be non-intuitive for annotation
977        // propagation and needs to be handled.
978        let mut old_node_ids = HashSet::new();
979        for source_edge in &source_edges {
980            old_node_ids.insert(source_edge.source_node_id);
981            old_node_ids.insert(source_edge.target_node_id);
982        }
983
984        old_node_ids.insert(start_block.node_id);
985        old_node_ids.insert(end_block.node_id);
986
987        let old_nodes = Node::query_by_ids(conn, &old_node_ids);
988        let old_nodes_by_id = old_nodes
989            .iter()
990            .map(|node| (node.id, node))
991            .collect::<HashMap<_, _>>();
992
993        let mut new_node_ids_by_old = HashMap::new();
994        let old_start_node = old_nodes_by_id.get(&start_block.node_id).unwrap();
995        let new_start_node_hash = HashId(calculate_hash(&format!(
996            "{}.{}.{}.bg-{}",
997            collection_name, child_sample_name, old_start_node.id, target_block_group_id
998        )));
999        new_node_ids_by_old.insert(
1000            old_start_node.id,
1001            Node::create(conn, &old_start_node.sequence_hash, &new_start_node_hash),
1002        );
1003        let old_end_node = old_nodes_by_id.get(&end_block.node_id).unwrap();
1004        let new_end_node_hash = HashId(calculate_hash(&format!(
1005            "{}.{}.{}.bg-{}",
1006            collection_name, child_sample_name, old_end_node.id, target_block_group_id
1007        )));
1008        new_node_ids_by_old.insert(
1009            old_end_node.id,
1010            Node::create(conn, &old_end_node.sequence_hash, &new_end_node_hash),
1011        );
1012
1013        let mut new_edges = vec![];
1014        for source_edge in &source_edges {
1015            let new_source_node_id = *new_node_ids_by_old
1016                .entry(source_edge.source_node_id)
1017                .or_insert_with(|| {
1018                    let old_source_node = old_nodes_by_id.get(&source_edge.source_node_id).unwrap();
1019                    let new_source_node_hash = HashId(calculate_hash(&format!(
1020                        "{}.{}.{}.bg-{}",
1021                        collection_name,
1022                        child_sample_name,
1023                        old_source_node.id,
1024                        target_block_group_id
1025                    )));
1026                    Node::create(conn, &old_source_node.sequence_hash, &new_source_node_hash)
1027                });
1028            let new_target_node_id = *new_node_ids_by_old
1029                .entry(source_edge.target_node_id)
1030                .or_insert_with(|| {
1031                    let old_target_node = old_nodes_by_id.get(&source_edge.target_node_id).unwrap();
1032                    let new_target_node_hash = HashId(calculate_hash(&format!(
1033                        "{}.{}.{}.bg-{}",
1034                        collection_name,
1035                        child_sample_name,
1036                        old_target_node.id,
1037                        target_block_group_id
1038                    )));
1039                    Node::create(conn, &old_target_node.sequence_hash, &new_target_node_hash)
1040                });
1041
1042            new_edges.push(EdgeData {
1043                source_node_id: new_source_node_id,
1044                source_coordinate: source_edge.source_coordinate,
1045                source_strand: source_edge.source_strand,
1046                target_node_id: new_target_node_id,
1047                target_coordinate: source_edge.target_coordinate,
1048                target_strand: source_edge.target_strand,
1049            });
1050        }
1051
1052        let new_edge_ids = Edge::bulk_create(conn, &new_edges);
1053        let new_edge_ids_by_old = source_edges
1054            .iter()
1055            .zip(new_edge_ids.iter())
1056            .map(|(source_edge, new_edge_id)| (&source_edge.id, new_edge_id))
1057            .collect::<HashMap<_, _>>();
1058
1059        let source_block_group_edges = BlockGroupEdge::specific_edges_for_block_group(
1060            conn,
1061            source_block_group_id,
1062            &subgraph_edge_ids,
1063        );
1064        let source_edge_ids = source_edges
1065            .iter()
1066            .map(|edge| &edge.id)
1067            .collect::<HashSet<_>>();
1068        let source_block_group_edges = source_block_group_edges
1069            .iter()
1070            .filter(|block_group_edge| source_edge_ids.contains(&block_group_edge.edge.id))
1071            .collect::<Vec<_>>();
1072        let source_block_group_edges_by_edge_id = source_block_group_edges
1073            .iter()
1074            .map(|block_group_edge| (&block_group_edge.edge.id, block_group_edge))
1075            .collect::<HashMap<_, _>>();
1076
1077        let subgraph_edge_inputs = source_block_group_edges
1078            .iter()
1079            .map(|edge| {
1080                let block_group_edge = source_block_group_edges_by_edge_id
1081                    .get(&edge.edge.id)
1082                    .unwrap();
1083                let new_edge_id = new_edge_ids_by_old.get(&edge.edge.id).unwrap();
1084                BlockGroupEdgeData {
1085                    block_group_id: *target_block_group_id,
1086                    edge_id: **new_edge_id,
1087                    chromosome_index: block_group_edge.chromosome_index,
1088                    phased: block_group_edge.phased,
1089                }
1090            })
1091            .collect::<Vec<_>>();
1092
1093        let new_start_node_id = new_node_ids_by_old.get(&start_block.node_id).unwrap();
1094        let new_start_edge = Edge::create(
1095            conn,
1096            PATH_START_NODE_ID,
1097            0,
1098            Strand::Forward,
1099            *new_start_node_id,
1100            start_node_coordinate,
1101            start_block.strand,
1102        );
1103        let new_start_edge_data = BlockGroupEdgeData {
1104            block_group_id: *target_block_group_id,
1105            edge_id: new_start_edge.id,
1106            chromosome_index: 0,
1107            phased: 0,
1108        };
1109        let new_end_node_id = new_node_ids_by_old.get(&end_block.node_id).unwrap();
1110        let new_end_edge = Edge::create(
1111            conn,
1112            *new_end_node_id,
1113            end_node_coordinate,
1114            end_block.strand,
1115            PATH_END_NODE_ID,
1116            0,
1117            Strand::Forward,
1118        );
1119        let new_end_edge_data = BlockGroupEdgeData {
1120            block_group_id: *target_block_group_id,
1121            edge_id: new_end_edge.id,
1122            chromosome_index: 0,
1123            phased: 0,
1124        };
1125        let mut all_edges = subgraph_edge_inputs.clone();
1126        all_edges.push(new_start_edge_data);
1127        all_edges.push(new_end_edge_data);
1128        BlockGroupEdge::bulk_create(conn, &all_edges);
1129
1130        new_node_ids_by_old
1131    }
1132}
1133
1134impl Query for BlockGroup {
1135    type Model = BlockGroup;
1136
1137    const TABLE_NAME: &'static str = "block_groups";
1138
1139    fn process_row(row: &Row) -> Self::Model {
1140        BlockGroup {
1141            id: row.get(0).unwrap(),
1142            collection_name: row.get(1).unwrap(),
1143            sample_name: row.get(2).unwrap(),
1144            name: row.get(3).unwrap(),
1145            created_on: row.get(4).unwrap(),
1146        }
1147    }
1148}
1149
1150#[cfg(test)]
1151mod tests {
1152    use core::ops::Range;
1153
1154    use capnp::message::TypedBuilder;
1155    use chrono::Utc;
1156    use gen_core::NO_CHROMOSOME_INDEX;
1157
1158    use super::*;
1159    use crate::{
1160        collection::Collection,
1161        node::Node,
1162        sample::Sample,
1163        sequence::Sequence,
1164        test_helpers::{get_connection, interval_tree_verify, setup_block_group},
1165    };
1166
1167    #[test]
1168    fn test_capnp_serialization() {
1169        let block_group = BlockGroup {
1170            id: HashId::pad_str(42),
1171            collection_name: "test_collection".to_string(),
1172            sample_name: Some("test_sample".to_string()),
1173            name: "test_block_group".to_string(),
1174            created_on: Utc::now().timestamp_nanos_opt().unwrap(),
1175        };
1176
1177        let mut message = TypedBuilder::<block_group::Owned>::new_default();
1178        let mut root = message.init_root();
1179        block_group.write_capnp(&mut root);
1180
1181        let deserialized = BlockGroup::read_capnp(root.into_reader());
1182        assert_eq!(block_group, deserialized);
1183    }
1184
1185    #[test]
1186    fn test_capnp_serialization_no_sample() {
1187        let block_group = BlockGroup {
1188            id: HashId::pad_str(43),
1189            collection_name: "test_collection_2".to_string(),
1190            sample_name: None,
1191            name: "test_block_group_2".to_string(),
1192            created_on: Utc::now().timestamp_nanos_opt().unwrap(),
1193        };
1194
1195        let mut message = TypedBuilder::<block_group::Owned>::new_default();
1196        let mut root = message.init_root();
1197        block_group.write_capnp(&mut root);
1198
1199        let deserialized = BlockGroup::read_capnp(root.into_reader());
1200        assert_eq!(block_group, deserialized);
1201    }
1202
1203    #[test]
1204    fn test_blockgroup_create() {
1205        let conn = &get_connection(None).unwrap();
1206        Collection::create(conn, "test");
1207        let bg1 = BlockGroup::create(conn, "test", None, "hg19");
1208        assert_eq!(bg1.collection_name, "test");
1209        assert_eq!(bg1.name, "hg19");
1210        Sample::get_or_create(conn, "sample");
1211        let bg2 = BlockGroup::create(conn, "test", Some("sample"), "hg19");
1212        assert_eq!(bg2.collection_name, "test");
1213        assert_eq!(bg2.name, "hg19");
1214        assert_eq!(bg2.sample_name, Some("sample".to_string()));
1215        assert_ne!(&bg1.id, &bg2.id);
1216    }
1217
1218    #[test]
1219    fn test_blockgroup_delete_without_sample() {
1220        let conn = &get_connection(None).unwrap();
1221        Collection::create(conn, "test");
1222        let bg1 = BlockGroup::create(conn, "test", None, "hg19");
1223        let bg2 = BlockGroup::create(conn, "test", None, "hg38");
1224
1225        BlockGroup::delete(conn, "test", None, &bg1.name);
1226
1227        let bgs = BlockGroup::all(conn);
1228        assert_eq!(bgs.len(), 1);
1229        assert_eq!(bgs[0], bg2);
1230    }
1231
1232    #[test]
1233    fn test_blockgroup_delete_with_sample() {
1234        let conn = &get_connection(None).unwrap();
1235        Collection::create(conn, "test");
1236        Sample::get_or_create(conn, "sample1");
1237        Sample::get_or_create(conn, "sample2");
1238        let bg1 = BlockGroup::create(conn, "test", Some("sample1"), "hg19");
1239        let bg2 = BlockGroup::create(conn, "test", Some("sample2"), "hg19");
1240
1241        BlockGroup::delete(conn, "test", bg1.sample_name.as_deref(), &bg1.name);
1242
1243        let bgs = BlockGroup::all(conn);
1244        assert_eq!(bgs.len(), 1);
1245        assert_eq!(bgs[0], bg2);
1246    }
1247
1248    #[test]
1249    fn test_blockgroup_clone() {
1250        let conn = &get_connection(None).unwrap();
1251        Collection::create(conn, "test");
1252        let bg1 = BlockGroup::create(conn, "test", None, "hg19");
1253        assert_eq!(bg1.collection_name, "test");
1254        assert_eq!(bg1.name, "hg19");
1255        Sample::get_or_create(conn, "sample");
1256        let bg2 =
1257            BlockGroup::get_or_create_sample_block_group(conn, "test", "sample", "hg19", None)
1258                .unwrap();
1259        assert_eq!(
1260            BlockGroupEdge::edges_for_block_group(conn, &bg1.id),
1261            BlockGroupEdge::edges_for_block_group(conn, &bg2)
1262        );
1263    }
1264
1265    #[test]
1266    fn test_blockgroup_clone_passes_accessions() {
1267        let conn = &get_connection(None).unwrap();
1268        let (bg_1, path) = setup_block_group(conn);
1269        let mut path_cache = PathCache::new(conn);
1270        PathCache::lookup(&mut path_cache, &bg_1, path.name.clone());
1271        let acc_1 = BlockGroup::add_accession(conn, &path, "test", 3, 7, &mut path_cache);
1272        assert_eq!(
1273            Accession::query(
1274                conn,
1275                "select * from accessions where name = ?1",
1276                rusqlite::params!(SQLValue::from("test".to_string())),
1277            ),
1278            vec![Accession {
1279                id: acc_1.id,
1280                name: "test".to_string(),
1281                path_id: path.id,
1282                parent_accession_id: None,
1283            }]
1284        );
1285
1286        Sample::get_or_create(conn, "sample2");
1287        let _bg2 =
1288            BlockGroup::get_or_create_sample_block_group(conn, "test", "sample2", "chr1", None)
1289                .unwrap();
1290        assert_eq!(
1291            Accession::query(
1292                conn,
1293                "select * from accessions where name = ?1",
1294                rusqlite::params!(SQLValue::from("test".to_string())),
1295            )
1296            .len(),
1297            2
1298        );
1299    }
1300
1301    #[test]
1302    fn insert_and_deletion_get_all() {
1303        let conn = get_connection(None).unwrap();
1304        let (block_group_id, path) = setup_block_group(&conn);
1305        let insert_sequence = Sequence::new()
1306            .sequence_type("DNA")
1307            .sequence("NNNN")
1308            .save(&conn);
1309        let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
1310        let insert = PathBlock {
1311            id: 0,
1312            node_id: insert_node_id,
1313            block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
1314            sequence_start: 0,
1315            sequence_end: 4,
1316            path_start: 7,
1317            path_end: 15,
1318            strand: Strand::Forward,
1319        };
1320        let change = PathChange {
1321            block_group_id,
1322            path: path.clone(),
1323            path_accession: None,
1324            start: 7,
1325            end: 15,
1326            block: insert,
1327            chromosome_index: 1,
1328            phased: 0,
1329            preserve_edge: true,
1330        };
1331        let tree = path.intervaltree(&conn);
1332        BlockGroup::insert_change(&conn, &change, &tree).unwrap();
1333
1334        let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
1335        assert_eq!(
1336            all_sequences,
1337            HashSet::from_iter(vec![
1338                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
1339                "AAAAAAANNNNTTTTTCCCCCCCCCCGGGGGGGGGG".to_string()
1340            ])
1341        );
1342
1343        let deletion_sequence = Sequence::new()
1344            .sequence_type("DNA")
1345            .sequence("")
1346            .save(&conn);
1347        let deletion_node_id =
1348            Node::create(&conn, &deletion_sequence.hash, &HashId::convert_str("2"));
1349        let deletion = PathBlock {
1350            id: 0,
1351            node_id: deletion_node_id,
1352            block_sequence: deletion_sequence.get_sequence(None, None),
1353            sequence_start: 0,
1354            sequence_end: 0,
1355            path_start: 19,
1356            path_end: 31,
1357            strand: Strand::Forward,
1358        };
1359
1360        let change = PathChange {
1361            block_group_id,
1362            path: path.clone(),
1363            path_accession: None,
1364            start: 19,
1365            end: 31,
1366            block: deletion,
1367            chromosome_index: 1,
1368            phased: 0,
1369            preserve_edge: true,
1370        };
1371        // take out an entire block.
1372        let tree = path.intervaltree(&conn);
1373        BlockGroup::insert_change(&conn, &change, &tree).unwrap();
1374        let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
1375        assert_eq!(
1376            all_sequences,
1377            HashSet::from_iter(vec![
1378                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
1379                "AAAAAAANNNNTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
1380                "AAAAAAAAAATTTTTTTTTGGGGGGGGG".to_string(),
1381                "AAAAAAANNNNTTTTGGGGGGGGG".to_string(),
1382            ])
1383        )
1384    }
1385
1386    #[test]
1387    fn simple_insert_get_all() {
1388        let conn = get_connection(None).unwrap();
1389        let (block_group_id, path) = setup_block_group(&conn);
1390        let insert_sequence = Sequence::new()
1391            .sequence_type("DNA")
1392            .sequence("NNNN")
1393            .save(&conn);
1394        let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
1395        let insert = PathBlock {
1396            id: 0,
1397            node_id: insert_node_id,
1398            block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
1399            sequence_start: 0,
1400            sequence_end: 4,
1401            path_start: 7,
1402            path_end: 15,
1403            strand: Strand::Forward,
1404        };
1405        let change = PathChange {
1406            block_group_id,
1407            path: path.clone(),
1408            path_accession: None,
1409            start: 7,
1410            end: 15,
1411            block: insert,
1412            chromosome_index: 1,
1413            phased: 0,
1414            preserve_edge: true,
1415        };
1416        let tree = path.intervaltree(&conn);
1417        BlockGroup::insert_change(&conn, &change, &tree).unwrap();
1418
1419        let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
1420        assert_eq!(
1421            all_sequences,
1422            HashSet::from_iter(vec![
1423                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
1424                "AAAAAAANNNNTTTTTCCCCCCCCCCGGGGGGGGGG".to_string()
1425            ])
1426        );
1427    }
1428
1429    #[test]
1430    fn insert_on_block_boundary_middle() {
1431        let conn = get_connection(None).unwrap();
1432        let (block_group_id, path) = setup_block_group(&conn);
1433        let insert_sequence = Sequence::new()
1434            .sequence_type("DNA")
1435            .sequence("NNNN")
1436            .save(&conn);
1437        let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
1438        let insert = PathBlock {
1439            id: 0,
1440            node_id: insert_node_id,
1441            block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
1442            sequence_start: 0,
1443            sequence_end: 4,
1444            path_start: 15,
1445            path_end: 15,
1446            strand: Strand::Forward,
1447        };
1448        let change = PathChange {
1449            block_group_id,
1450            path: path.clone(),
1451            path_accession: None,
1452            start: 15,
1453            end: 15,
1454            block: insert,
1455            chromosome_index: 1,
1456            phased: 0,
1457            preserve_edge: true,
1458        };
1459        let tree = path.intervaltree(&conn);
1460        BlockGroup::insert_change(&conn, &change, &tree).unwrap();
1461
1462        let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
1463        assert_eq!(
1464            all_sequences,
1465            HashSet::from_iter(vec![
1466                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
1467                "AAAAAAAAAATTTTTNNNNTTTTTCCCCCCCCCCGGGGGGGGGG".to_string()
1468            ])
1469        );
1470    }
1471
1472    #[test]
1473    fn insert_within_block() {
1474        let conn = get_connection(None).unwrap();
1475        let (block_group_id, path) = setup_block_group(&conn);
1476        let insert_sequence = Sequence::new()
1477            .sequence_type("DNA")
1478            .sequence("NNNN")
1479            .save(&conn);
1480        let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
1481        let insert = PathBlock {
1482            id: 0,
1483            node_id: insert_node_id,
1484            block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
1485            sequence_start: 0,
1486            sequence_end: 4,
1487            path_start: 12,
1488            path_end: 17,
1489            strand: Strand::Forward,
1490        };
1491        let change = PathChange {
1492            block_group_id,
1493            path: path.clone(),
1494            path_accession: None,
1495            start: 12,
1496            end: 17,
1497            block: insert,
1498            chromosome_index: 1,
1499            phased: 0,
1500            preserve_edge: true,
1501        };
1502        let tree = path.intervaltree(&conn);
1503        BlockGroup::insert_change(&conn, &change, &tree).unwrap();
1504
1505        let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
1506        assert_eq!(
1507            all_sequences,
1508            HashSet::from_iter(vec![
1509                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
1510                "AAAAAAAAAATTNNNNTTTCCCCCCCCCCGGGGGGGGGG".to_string()
1511            ])
1512        );
1513    }
1514
1515    #[test]
1516    fn insert_on_block_boundary_start() {
1517        let conn = get_connection(None).unwrap();
1518        let (block_group_id, path) = setup_block_group(&conn);
1519        let insert_sequence = Sequence::new()
1520            .sequence_type("DNA")
1521            .sequence("NNNN")
1522            .save(&conn);
1523        let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
1524        let insert = PathBlock {
1525            id: 0,
1526            node_id: insert_node_id,
1527            block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
1528            sequence_start: 0,
1529            sequence_end: 4,
1530            path_start: 10,
1531            path_end: 10,
1532            strand: Strand::Forward,
1533        };
1534        let change = PathChange {
1535            block_group_id,
1536            path: path.clone(),
1537            path_accession: None,
1538            start: 10,
1539            end: 10,
1540            block: insert,
1541            chromosome_index: 1,
1542            phased: 0,
1543            preserve_edge: true,
1544        };
1545        let tree = path.intervaltree(&conn);
1546        BlockGroup::insert_change(&conn, &change, &tree).unwrap();
1547
1548        let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
1549        assert_eq!(
1550            all_sequences,
1551            HashSet::from_iter(vec![
1552                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
1553                "AAAAAAAAAANNNNTTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string()
1554            ])
1555        );
1556    }
1557
1558    #[test]
1559    fn insert_on_block_boundary_end() {
1560        let conn = get_connection(None).unwrap();
1561        let (block_group_id, path) = setup_block_group(&conn);
1562        let insert_sequence = Sequence::new()
1563            .sequence_type("DNA")
1564            .sequence("NNNN")
1565            .save(&conn);
1566        let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
1567        let insert = PathBlock {
1568            id: 0,
1569            node_id: insert_node_id,
1570            block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
1571            sequence_start: 0,
1572            sequence_end: 4,
1573            path_start: 9,
1574            path_end: 9,
1575            strand: Strand::Forward,
1576        };
1577        let change = PathChange {
1578            block_group_id,
1579            path: path.clone(),
1580            path_accession: None,
1581            start: 9,
1582            end: 9,
1583            block: insert,
1584            chromosome_index: 1,
1585            phased: 0,
1586            preserve_edge: true,
1587        };
1588        let tree = path.intervaltree(&conn);
1589        BlockGroup::insert_change(&conn, &change, &tree).unwrap();
1590
1591        let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
1592        assert_eq!(
1593            all_sequences,
1594            HashSet::from_iter(vec![
1595                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
1596                "AAAAAAAAANNNNATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string()
1597            ])
1598        );
1599    }
1600
1601    #[test]
1602    fn insert_across_entire_block_boundary() {
1603        let conn = get_connection(None).unwrap();
1604        let (block_group_id, path) = setup_block_group(&conn);
1605        let insert_sequence = Sequence::new()
1606            .sequence_type("DNA")
1607            .sequence("NNNN")
1608            .save(&conn);
1609        let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
1610        let insert = PathBlock {
1611            id: 0,
1612            node_id: insert_node_id,
1613            block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
1614            sequence_start: 0,
1615            sequence_end: 4,
1616            path_start: 10,
1617            path_end: 20,
1618            strand: Strand::Forward,
1619        };
1620        let change = PathChange {
1621            block_group_id,
1622            path: path.clone(),
1623            path_accession: None,
1624            start: 10,
1625            end: 20,
1626            block: insert,
1627            chromosome_index: 1,
1628            phased: 0,
1629            preserve_edge: true,
1630        };
1631        let tree = path.intervaltree(&conn);
1632        BlockGroup::insert_change(&conn, &change, &tree).unwrap();
1633
1634        let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
1635        assert_eq!(
1636            all_sequences,
1637            HashSet::from_iter(vec![
1638                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
1639                "AAAAAAAAAANNNNCCCCCCCCCCGGGGGGGGGG".to_string()
1640            ])
1641        );
1642    }
1643
1644    #[test]
1645    fn insert_across_two_blocks() {
1646        let conn = get_connection(None).unwrap();
1647        let (block_group_id, path) = setup_block_group(&conn);
1648        let insert_sequence = Sequence::new()
1649            .sequence_type("DNA")
1650            .sequence("NNNN")
1651            .save(&conn);
1652        let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
1653        let insert = PathBlock {
1654            id: 0,
1655            node_id: insert_node_id,
1656            block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
1657            sequence_start: 0,
1658            sequence_end: 4,
1659            path_start: 15,
1660            path_end: 25,
1661            strand: Strand::Forward,
1662        };
1663        let change = PathChange {
1664            block_group_id,
1665            path: path.clone(),
1666            path_accession: None,
1667            start: 15,
1668            end: 25,
1669            block: insert,
1670            chromosome_index: 1,
1671            phased: 0,
1672            preserve_edge: true,
1673        };
1674        let tree = path.intervaltree(&conn);
1675        BlockGroup::insert_change(&conn, &change, &tree).unwrap();
1676
1677        let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
1678        assert_eq!(
1679            all_sequences,
1680            HashSet::from_iter(vec![
1681                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
1682                "AAAAAAAAAATTTTTNNNNCCCCCGGGGGGGGGG".to_string()
1683            ])
1684        );
1685    }
1686
1687    #[test]
1688    fn insert_spanning_blocks() {
1689        let conn = get_connection(None).unwrap();
1690        let (block_group_id, path) = setup_block_group(&conn);
1691        let insert_sequence = Sequence::new()
1692            .sequence_type("DNA")
1693            .sequence("NNNN")
1694            .save(&conn);
1695        let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
1696        let insert = PathBlock {
1697            id: 0,
1698            node_id: insert_node_id,
1699            block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
1700            sequence_start: 0,
1701            sequence_end: 4,
1702            path_start: 5,
1703            path_end: 35,
1704            strand: Strand::Forward,
1705        };
1706        let change = PathChange {
1707            block_group_id,
1708            path: path.clone(),
1709            path_accession: None,
1710            start: 5,
1711            end: 35,
1712            block: insert,
1713            chromosome_index: 1,
1714            phased: 0,
1715            preserve_edge: true,
1716        };
1717        let tree = path.intervaltree(&conn);
1718        BlockGroup::insert_change(&conn, &change, &tree).unwrap();
1719
1720        let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
1721        assert_eq!(
1722            all_sequences,
1723            HashSet::from_iter(vec![
1724                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
1725                "AAAAANNNNGGGGG".to_string()
1726            ])
1727        );
1728    }
1729
1730    #[test]
1731    fn simple_deletion() {
1732        let conn = get_connection(None).unwrap();
1733        let (block_group_id, path) = setup_block_group(&conn);
1734        let deletion_sequence = Sequence::new()
1735            .sequence_type("DNA")
1736            .sequence("")
1737            .save(&conn);
1738        let deletion_node_id =
1739            Node::create(&conn, &deletion_sequence.hash, &HashId::convert_str("1"));
1740        let deletion = PathBlock {
1741            id: 0,
1742            node_id: deletion_node_id,
1743            block_sequence: deletion_sequence.get_sequence(None, None),
1744            sequence_start: 0,
1745            sequence_end: 0,
1746            path_start: 19,
1747            path_end: 31,
1748            strand: Strand::Forward,
1749        };
1750
1751        let change = PathChange {
1752            block_group_id,
1753            path: path.clone(),
1754            path_accession: None,
1755            start: 19,
1756            end: 31,
1757            block: deletion,
1758            chromosome_index: 1,
1759            phased: 0,
1760            preserve_edge: true,
1761        };
1762
1763        // take out an entire block.
1764        let tree = path.intervaltree(&conn);
1765        BlockGroup::insert_change(&conn, &change, &tree).unwrap();
1766        let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
1767        assert_eq!(
1768            all_sequences,
1769            HashSet::from_iter(vec![
1770                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
1771                "AAAAAAAAAATTTTTTTTTGGGGGGGGG".to_string(),
1772            ])
1773        )
1774    }
1775
1776    #[test]
1777    fn doesnt_apply_same_insert_twice() {
1778        let conn = get_connection(None).unwrap();
1779        let (block_group_id, path) = setup_block_group(&conn);
1780        let insert_sequence = Sequence::new()
1781            .sequence_type("DNA")
1782            .sequence("NNNN")
1783            .save(&conn);
1784        let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
1785        let insert = PathBlock {
1786            id: 0,
1787            node_id: insert_node_id,
1788            block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
1789            sequence_start: 0,
1790            sequence_end: 4,
1791            path_start: 7,
1792            path_end: 15,
1793            strand: Strand::Forward,
1794        };
1795        let change = PathChange {
1796            block_group_id,
1797            path: path.clone(),
1798            path_accession: None,
1799            start: 7,
1800            end: 15,
1801            block: insert,
1802            chromosome_index: 1,
1803            phased: 0,
1804            preserve_edge: true,
1805        };
1806        let tree = path.intervaltree(&conn);
1807        BlockGroup::insert_change(&conn, &change, &tree).unwrap();
1808
1809        let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
1810        assert_eq!(
1811            all_sequences,
1812            HashSet::from_iter(vec![
1813                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
1814                "AAAAAAANNNNTTTTTCCCCCCCCCCGGGGGGGGGG".to_string()
1815            ])
1816        );
1817
1818        let tree = path.intervaltree(&conn);
1819        BlockGroup::insert_change(&conn, &change, &tree).unwrap();
1820
1821        let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
1822        assert_eq!(
1823            all_sequences,
1824            HashSet::from_iter(vec![
1825                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
1826                "AAAAAAANNNNTTTTTCCCCCCCCCCGGGGGGGGGG".to_string()
1827            ])
1828        );
1829    }
1830
1831    #[test]
1832    fn insert_at_beginning_of_path() {
1833        let conn = get_connection(None).unwrap();
1834        let (block_group_id, path) = setup_block_group(&conn);
1835        let insert_sequence = Sequence::new()
1836            .sequence_type("DNA")
1837            .sequence("NNNN")
1838            .save(&conn);
1839        let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
1840        let insert = PathBlock {
1841            id: 0,
1842            node_id: insert_node_id,
1843            block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
1844            sequence_start: 0,
1845            sequence_end: 4,
1846            path_start: 0,
1847            path_end: 0,
1848            strand: Strand::Forward,
1849        };
1850        let change = PathChange {
1851            block_group_id,
1852            path: path.clone(),
1853            path_accession: None,
1854            start: 0,
1855            end: 0,
1856            block: insert,
1857            chromosome_index: 1,
1858            phased: 0,
1859            preserve_edge: true,
1860        };
1861        let tree = path.intervaltree(&conn);
1862        BlockGroup::insert_change(&conn, &change, &tree).unwrap();
1863
1864        let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
1865        assert_eq!(
1866            all_sequences,
1867            HashSet::from_iter(vec![
1868                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
1869                "NNNNAAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
1870            ])
1871        );
1872    }
1873
1874    #[test]
1875    fn homozygous_insert_at_beginning_of_path() {
1876        let conn = get_connection(None).unwrap();
1877        let (block_group_id, path) = setup_block_group(&conn);
1878        let insert_sequence = Sequence::new()
1879            .sequence_type("DNA")
1880            .sequence("NNNN")
1881            .save(&conn);
1882        let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
1883        let insert = PathBlock {
1884            id: 0,
1885            node_id: insert_node_id,
1886            block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
1887            sequence_start: 0,
1888            sequence_end: 4,
1889            path_start: 0,
1890            path_end: 0,
1891            strand: Strand::Forward,
1892        };
1893        let change = PathChange {
1894            block_group_id,
1895            path: path.clone(),
1896            path_accession: None,
1897            start: 0,
1898            end: 0,
1899            block: insert,
1900            chromosome_index: 0,
1901            phased: 0,
1902            preserve_edge: true,
1903        };
1904        let tree = path.intervaltree(&conn);
1905        BlockGroup::insert_change(&conn, &change, &tree).unwrap();
1906
1907        let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
1908        assert_eq!(
1909            all_sequences,
1910            HashSet::from_iter(vec![
1911                "NNNNAAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
1912            ])
1913        );
1914    }
1915
1916    #[test]
1917    fn insert_at_end_of_path() {
1918        let conn = get_connection(None).unwrap();
1919        let (block_group_id, path) = setup_block_group(&conn);
1920
1921        let insert_sequence = Sequence::new()
1922            .sequence_type("DNA")
1923            .sequence("NNNN")
1924            .save(&conn);
1925        let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
1926        let insert = PathBlock {
1927            id: 0,
1928            node_id: insert_node_id,
1929            block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
1930            sequence_start: 0,
1931            sequence_end: 4,
1932            path_start: 40,
1933            path_end: 40,
1934            strand: Strand::Forward,
1935        };
1936        let change = PathChange {
1937            block_group_id,
1938            path: path.clone(),
1939            path_accession: None,
1940            start: 40,
1941            end: 40,
1942            block: insert,
1943            chromosome_index: 1,
1944            phased: 0,
1945            preserve_edge: true,
1946        };
1947        let tree = path.intervaltree(&conn);
1948        BlockGroup::insert_change(&conn, &change, &tree).unwrap();
1949
1950        let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
1951        assert_eq!(
1952            all_sequences,
1953            HashSet::from_iter(vec![
1954                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
1955                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGGNNNN".to_string(),
1956            ])
1957        );
1958    }
1959
1960    #[test]
1961    fn insert_at_one_bp_into_block() {
1962        let conn = get_connection(None).unwrap();
1963        let (block_group_id, path) = setup_block_group(&conn);
1964        let insert_sequence = Sequence::new()
1965            .sequence_type("DNA")
1966            .sequence("NNNN")
1967            .save(&conn);
1968        let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
1969        let insert = PathBlock {
1970            id: 0,
1971            node_id: insert_node_id,
1972            block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
1973            sequence_start: 0,
1974            sequence_end: 4,
1975            path_start: 10,
1976            path_end: 11,
1977            strand: Strand::Forward,
1978        };
1979        let change = PathChange {
1980            block_group_id,
1981            path: path.clone(),
1982            path_accession: None,
1983            start: 10,
1984            end: 11,
1985            block: insert,
1986            chromosome_index: 1,
1987            phased: 0,
1988            preserve_edge: true,
1989        };
1990        let tree = path.intervaltree(&conn);
1991        BlockGroup::insert_change(&conn, &change, &tree).unwrap();
1992
1993        let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
1994        assert_eq!(
1995            all_sequences,
1996            HashSet::from_iter(vec![
1997                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
1998                "AAAAAAAAAANNNNTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
1999            ])
2000        );
2001    }
2002
2003    #[test]
2004    fn insert_at_one_bp_from_end_of_block() {
2005        let conn = get_connection(None).unwrap();
2006        let (block_group_id, path) = setup_block_group(&conn);
2007        let insert_sequence = Sequence::new()
2008            .sequence_type("DNA")
2009            .sequence("NNNN")
2010            .save(&conn);
2011        let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
2012        let insert = PathBlock {
2013            id: 0,
2014            node_id: insert_node_id,
2015            block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
2016            sequence_start: 0,
2017            sequence_end: 4,
2018            path_start: 19,
2019            path_end: 20,
2020            strand: Strand::Forward,
2021        };
2022        let change = PathChange {
2023            block_group_id,
2024            path: path.clone(),
2025            path_accession: None,
2026            start: 19,
2027            end: 20,
2028            block: insert,
2029            chromosome_index: 1,
2030            phased: 0,
2031            preserve_edge: true,
2032        };
2033        let tree = path.intervaltree(&conn);
2034        BlockGroup::insert_change(&conn, &change, &tree).unwrap();
2035
2036        let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
2037        assert_eq!(
2038            all_sequences,
2039            HashSet::from_iter(vec![
2040                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2041                "AAAAAAAAAATTTTTTTTTNNNNCCCCCCCCCCGGGGGGGGGG".to_string(),
2042            ])
2043        );
2044    }
2045
2046    #[test]
2047    fn delete_at_beginning_of_path() {
2048        let conn = get_connection(None).unwrap();
2049        let (block_group_id, path) = setup_block_group(&conn);
2050        let deletion_sequence = Sequence::new()
2051            .sequence_type("DNA")
2052            .sequence("")
2053            .save(&conn);
2054        let deletion_node_id =
2055            Node::create(&conn, &deletion_sequence.hash, &HashId::convert_str("1"));
2056        let deletion = PathBlock {
2057            id: 0,
2058            node_id: deletion_node_id,
2059            block_sequence: deletion_sequence.get_sequence(None, None),
2060            sequence_start: 0,
2061            sequence_end: 0,
2062            path_start: 0,
2063            path_end: 1,
2064            strand: Strand::Forward,
2065        };
2066        let change = PathChange {
2067            block_group_id,
2068            path: path.clone(),
2069            path_accession: None,
2070            start: 0,
2071            end: 1,
2072            block: deletion,
2073            chromosome_index: 1,
2074            phased: 0,
2075            preserve_edge: true,
2076        };
2077        let tree = path.intervaltree(&conn);
2078        BlockGroup::insert_change(&conn, &change, &tree).unwrap();
2079
2080        let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
2081        assert_eq!(
2082            all_sequences,
2083            HashSet::from_iter(vec![
2084                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2085                "AAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2086            ])
2087        );
2088    }
2089
2090    #[test]
2091    fn delete_at_end_of_path() {
2092        let conn = get_connection(None).unwrap();
2093        let (block_group_id, path) = setup_block_group(&conn);
2094        let deletion_sequence = Sequence::new()
2095            .sequence_type("DNA")
2096            .sequence("")
2097            .save(&conn);
2098        let deletion_node_id =
2099            Node::create(&conn, &deletion_sequence.hash, &HashId::convert_str("1"));
2100        let deletion = PathBlock {
2101            id: 0,
2102            node_id: deletion_node_id,
2103            block_sequence: deletion_sequence.get_sequence(None, None),
2104            sequence_start: 0,
2105            sequence_end: 0,
2106            path_start: 35,
2107            path_end: 40,
2108            strand: Strand::Forward,
2109        };
2110        let change = PathChange {
2111            block_group_id,
2112            path: path.clone(),
2113            path_accession: None,
2114            start: 35,
2115            end: 40,
2116            block: deletion,
2117            chromosome_index: 1,
2118            phased: 0,
2119            preserve_edge: true,
2120        };
2121        let tree = path.intervaltree(&conn);
2122        BlockGroup::insert_change(&conn, &change, &tree).unwrap();
2123
2124        let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
2125        assert_eq!(
2126            all_sequences,
2127            HashSet::from_iter(vec![
2128                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGG".to_string(),
2129                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2130            ])
2131        );
2132    }
2133
2134    #[test]
2135    fn error_on_out_of_bounds_change() {
2136        let conn = get_connection(None).unwrap();
2137        let (block_group_id, path) = setup_block_group(&conn);
2138        let deletion_sequence = Sequence::new()
2139            .sequence_type("DNA")
2140            .sequence("")
2141            .save(&conn);
2142        let deletion_node_id =
2143            Node::create(&conn, &deletion_sequence.hash, &HashId::convert_str("1"));
2144        let deletion = PathBlock {
2145            id: 0,
2146            node_id: deletion_node_id,
2147            block_sequence: deletion_sequence.get_sequence(None, None),
2148            sequence_start: 0,
2149            sequence_end: 0,
2150            path_start: 350,
2151            path_end: 400,
2152            strand: Strand::Forward,
2153        };
2154        let after_end_change = PathChange {
2155            block_group_id,
2156            path: path.clone(),
2157            path_accession: None,
2158            start: 350,
2159            end: 400,
2160            block: deletion.clone(),
2161            chromosome_index: 1,
2162            phased: 0,
2163            preserve_edge: true,
2164        };
2165        let before_start_change = PathChange {
2166            block_group_id,
2167            path: path.clone(),
2168            path_accession: None,
2169            start: -300,
2170            end: 400,
2171            block: deletion,
2172            chromosome_index: 1,
2173            phased: 0,
2174            preserve_edge: true,
2175        };
2176        let tree = path.intervaltree(&conn);
2177        let res = BlockGroup::insert_change(&conn, &after_end_change, &tree);
2178        assert!(matches!(res, Err(ChangeError::OutOfBounds(_))));
2179        let res = BlockGroup::insert_change(&conn, &before_start_change, &tree);
2180        assert!(matches!(res, Err(ChangeError::OutOfBounds(_))));
2181    }
2182
2183    #[test]
2184    fn deletion_starting_at_block_boundary() {
2185        let conn = get_connection(None).unwrap();
2186        let (block_group_id, path) = setup_block_group(&conn);
2187        let deletion_sequence = Sequence::new()
2188            .sequence_type("DNA")
2189            .sequence("")
2190            .save(&conn);
2191        let deletion_node_id =
2192            Node::create(&conn, &deletion_sequence.hash, &HashId::convert_str("1"));
2193        let deletion = PathBlock {
2194            id: 0,
2195            node_id: deletion_node_id,
2196            block_sequence: deletion_sequence.get_sequence(None, None),
2197            sequence_start: 0,
2198            sequence_end: 0,
2199            path_start: 10,
2200            path_end: 12,
2201            strand: Strand::Forward,
2202        };
2203        let change = PathChange {
2204            block_group_id,
2205            path: path.clone(),
2206            path_accession: None,
2207            start: 10,
2208            end: 12,
2209            block: deletion,
2210            chromosome_index: 1,
2211            phased: 0,
2212            preserve_edge: true,
2213        };
2214        let tree = path.intervaltree(&conn);
2215        BlockGroup::insert_change(&conn, &change, &tree).unwrap();
2216
2217        let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
2218        assert_eq!(
2219            all_sequences,
2220            HashSet::from_iter(vec![
2221                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2222                "AAAAAAAAAATTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2223            ])
2224        );
2225    }
2226
2227    #[test]
2228    fn deletion_ending_at_block_boundary() {
2229        let conn = get_connection(None).unwrap();
2230        let (block_group_id, path) = setup_block_group(&conn);
2231        let deletion_sequence = Sequence::new()
2232            .sequence_type("DNA")
2233            .sequence("")
2234            .save(&conn);
2235        let deletion_node_id =
2236            Node::create(&conn, &deletion_sequence.hash, &HashId::convert_str("1"));
2237        let deletion = PathBlock {
2238            id: 0,
2239            node_id: deletion_node_id,
2240            block_sequence: deletion_sequence.get_sequence(None, None),
2241            sequence_start: 0,
2242            sequence_end: 0,
2243            path_start: 18,
2244            path_end: 20,
2245            strand: Strand::Forward,
2246        };
2247        let change = PathChange {
2248            block_group_id,
2249            path: path.clone(),
2250            path_accession: None,
2251            start: 18,
2252            end: 20,
2253            block: deletion,
2254            chromosome_index: 1,
2255            phased: 0,
2256            preserve_edge: true,
2257        };
2258        let tree = path.intervaltree(&conn);
2259        BlockGroup::insert_change(&conn, &change, &tree).unwrap();
2260
2261        let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
2262        assert_eq!(
2263            all_sequences,
2264            HashSet::from_iter(vec![
2265                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2266                "AAAAAAAAAATTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2267            ])
2268        );
2269    }
2270
2271    #[test]
2272    fn test_blockgroup_interval_tree() {
2273        let conn = &get_connection(None).unwrap();
2274        let (block_group_id, path) = setup_block_group(conn);
2275        let _new_sample = Sample::get_or_create(conn, "child");
2276        let new_bg_id =
2277            BlockGroup::get_or_create_sample_block_group(conn, "test", "child", "chr1", None)
2278                .unwrap();
2279        let new_path = Path::query(
2280            conn,
2281            "select * from paths where block_group_id = ?1",
2282            params![new_bg_id],
2283        );
2284        let insert_sequence = Sequence::new()
2285            .sequence_type("DNA")
2286            .sequence("NNNN")
2287            .save(conn);
2288        let insert_node_id = Node::create(
2289            conn,
2290            &insert_sequence.hash,
2291            &HashId::convert_str("insert-node"),
2292        );
2293        let insert = PathBlock {
2294            id: 0,
2295            node_id: insert_node_id,
2296            block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
2297            sequence_start: 0,
2298            sequence_end: 4,
2299            path_start: 7,
2300            path_end: 15,
2301            strand: Strand::Forward,
2302        };
2303        let change = PathChange {
2304            block_group_id: new_bg_id,
2305            path: new_path[0].clone(),
2306            path_accession: None,
2307            start: 7,
2308            end: 15,
2309            block: insert,
2310            chromosome_index: 1,
2311            phased: 0,
2312            preserve_edge: true,
2313        };
2314        let tree = path.intervaltree(conn);
2315        BlockGroup::insert_change(conn, &change, &tree).unwrap();
2316
2317        let tree = BlockGroup::intervaltree_for(conn, &block_group_id, false);
2318        let tree2 = BlockGroup::intervaltree_for(conn, &block_group_id, true);
2319        interval_tree_verify(
2320            &tree,
2321            3,
2322            &[NodeIntervalBlock {
2323                block_id: 3,
2324                node_id: HashId::convert_str("test-a-node"),
2325                start: 0,
2326                end: 10,
2327                sequence_start: 0,
2328                sequence_end: 10,
2329                strand: Strand::Forward,
2330            }],
2331        );
2332        interval_tree_verify(
2333            &tree2,
2334            3,
2335            &[NodeIntervalBlock {
2336                block_id: 3,
2337                node_id: HashId::convert_str("test-a-node"),
2338                start: 0,
2339                end: 10,
2340                sequence_start: 0,
2341                sequence_end: 10,
2342                strand: Strand::Forward,
2343            }],
2344        );
2345        interval_tree_verify(
2346            &tree,
2347            35,
2348            &[NodeIntervalBlock {
2349                block_id: 0,
2350                node_id: HashId::convert_str("test-g-node"),
2351                start: 30,
2352                end: 40,
2353                sequence_start: 0,
2354                sequence_end: 10,
2355                strand: Strand::Forward,
2356            }],
2357        );
2358        interval_tree_verify(
2359            &tree2,
2360            35,
2361            &[NodeIntervalBlock {
2362                block_id: 0,
2363                node_id: HashId::convert_str("test-g-node"),
2364                start: 30,
2365                end: 40,
2366                sequence_start: 0,
2367                sequence_end: 10,
2368                strand: Strand::Forward,
2369            }],
2370        );
2371
2372        // This blockgroup has a change from positions 7-15 of 4 base pairs -- so any changes after this will be ambiguous
2373        let tree = BlockGroup::intervaltree_for(conn, &new_bg_id, false);
2374        let tree2 = BlockGroup::intervaltree_for(conn, &new_bg_id, true);
2375        interval_tree_verify(
2376            &tree,
2377            3,
2378            &[NodeIntervalBlock {
2379                block_id: 5,
2380                node_id: HashId::convert_str("test-a-node"),
2381                start: 0,
2382                end: 7,
2383                sequence_start: 0,
2384                sequence_end: 7,
2385                strand: Strand::Forward,
2386            }],
2387        );
2388        interval_tree_verify(
2389            &tree2,
2390            3,
2391            &[NodeIntervalBlock {
2392                block_id: 5,
2393                node_id: HashId::convert_str("test-a-node"),
2394                start: 0,
2395                end: 7,
2396                sequence_start: 0,
2397                sequence_end: 7,
2398                strand: Strand::Forward,
2399            }],
2400        );
2401        interval_tree_verify(
2402            &tree,
2403            30,
2404            &[
2405                NodeIntervalBlock {
2406                    block_id: 0,
2407                    node_id: HashId::convert_str("test-g-node"),
2408                    start: 26,
2409                    end: 36,
2410                    sequence_start: 0,
2411                    sequence_end: 10,
2412                    strand: Strand::Forward,
2413                },
2414                NodeIntervalBlock {
2415                    block_id: 0,
2416                    node_id: HashId::convert_str("test-g-node"),
2417                    start: 30,
2418                    end: 40,
2419                    sequence_start: 0,
2420                    sequence_end: 10,
2421                    strand: Strand::Forward,
2422                },
2423            ],
2424        );
2425        interval_tree_verify(&tree2, 30, &[]);
2426        // TODO: This case should return [] because there are 2 distinct nodes at this position and thus it is ambiguous.
2427        // currently, the caller needs to filter these out.
2428        interval_tree_verify(
2429            &tree2,
2430            9,
2431            &[
2432                NodeIntervalBlock {
2433                    block_id: 4,
2434                    node_id: HashId::convert_str("insert-node"),
2435                    start: 7,
2436                    end: 11,
2437                    sequence_start: 0,
2438                    sequence_end: 4,
2439                    strand: Strand::Forward,
2440                },
2441                NodeIntervalBlock {
2442                    block_id: 6,
2443                    node_id: HashId::convert_str("test-a-node"),
2444                    start: 7,
2445                    end: 10,
2446                    sequence_start: 7,
2447                    sequence_end: 10,
2448                    strand: Strand::Forward,
2449                },
2450            ],
2451        );
2452    }
2453
2454    #[test]
2455    fn test_changes_against_derivative_blockgroups() {
2456        let conn = &get_connection(None).unwrap();
2457        let (_block_group_id, _path) = setup_block_group(conn);
2458        let _new_sample = Sample::get_or_create(conn, "child");
2459        let new_bg_id =
2460            BlockGroup::get_or_create_sample_block_group(conn, "test", "child", "chr1", None)
2461                .unwrap();
2462        let new_path = Path::query(
2463            conn,
2464            "select * from paths where block_group_id = ?1",
2465            rusqlite::params!(SQLValue::from(new_bg_id)),
2466        );
2467        let insert_sequence = Sequence::new()
2468            .sequence_type("DNA")
2469            .sequence("NNNN")
2470            .save(conn);
2471        let insert_node_id = Node::create(conn, &insert_sequence.hash, &HashId::convert_str("1"));
2472        let insert = PathBlock {
2473            id: 0,
2474            node_id: insert_node_id,
2475            block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
2476            sequence_start: 0,
2477            sequence_end: 4,
2478            path_start: 7,
2479            path_end: 15,
2480            strand: Strand::Forward,
2481        };
2482        let change = PathChange {
2483            block_group_id: new_bg_id,
2484            path: new_path[0].clone(),
2485            path_accession: None,
2486            start: 7,
2487            end: 15,
2488            block: insert,
2489            chromosome_index: 1,
2490            phased: 0,
2491            preserve_edge: false,
2492        };
2493        // note we are making our change against the new blockgroup, and not the parent blockgroup
2494        let tree = BlockGroup::intervaltree_for(conn, &new_bg_id, true);
2495        BlockGroup::insert_change(conn, &change, &tree).unwrap();
2496        let all_sequences = BlockGroup::get_all_sequences(conn, &new_bg_id, true);
2497        assert_eq!(
2498            all_sequences,
2499            HashSet::from_iter(vec!["AAAAAAANNNNTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),])
2500        );
2501
2502        // Now, we make a change against another descendant
2503        let _new_sample = Sample::get_or_create(conn, "grandchild");
2504        let gc_bg_id = BlockGroup::get_or_create_sample_block_group(
2505            conn,
2506            "test",
2507            "grandchild",
2508            "chr1",
2509            Some("child"),
2510        )
2511        .unwrap();
2512        let new_path = Path::query(
2513            conn,
2514            "select * from paths where block_group_id = ?1",
2515            rusqlite::params!(SQLValue::from(gc_bg_id)),
2516        );
2517
2518        let insert = PathBlock {
2519            id: 0,
2520            node_id: insert_node_id,
2521            block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
2522            sequence_start: 0,
2523            sequence_end: 4,
2524            path_start: 7,
2525            path_end: 15,
2526            strand: Strand::Forward,
2527        };
2528        let change = PathChange {
2529            block_group_id: gc_bg_id,
2530            path: new_path[0].clone(),
2531            path_accession: None,
2532            start: 7,
2533            end: 15,
2534            block: insert,
2535            chromosome_index: 1,
2536            phased: 0,
2537            preserve_edge: false,
2538        };
2539        // take out an entire block.
2540        let tree = BlockGroup::intervaltree_for(conn, &gc_bg_id, true);
2541        BlockGroup::insert_change(conn, &change, &tree).unwrap();
2542        let all_sequences = BlockGroup::get_all_sequences(conn, &gc_bg_id, true);
2543        assert_eq!(
2544            all_sequences,
2545            HashSet::from_iter(vec!["AAAAAAANNNNTCCCCCCCCCCGGGGGGGGGG".to_string(),])
2546        )
2547    }
2548
2549    #[test]
2550    fn test_changes_against_derivative_diploid_blockgroups() {
2551        // This test ensures that if we have heterozygous changes that do not introduce frameshifts,
2552        // we can modify regions downstream of them.
2553        let conn = &get_connection(None).unwrap();
2554        let (_block_group_id, _path) = setup_block_group(conn);
2555        let _new_sample = Sample::get_or_create(conn, "child");
2556        let new_bg_id =
2557            BlockGroup::get_or_create_sample_block_group(conn, "test", "child", "chr1", None)
2558                .unwrap();
2559        let new_path = Path::query(
2560            conn,
2561            "select * from paths where block_group_id = ?1",
2562            params![new_bg_id],
2563        );
2564        let insert_sequence = Sequence::new()
2565            .sequence_type("DNA")
2566            .sequence("NNNN")
2567            .save(conn);
2568        let insert_node_id = Node::create(conn, &insert_sequence.hash, &HashId::convert_str("1"));
2569        let insert = PathBlock {
2570            id: 0,
2571            node_id: insert_node_id,
2572            block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
2573            sequence_start: 0,
2574            sequence_end: 4,
2575            path_start: 7,
2576            path_end: 11,
2577            strand: Strand::Forward,
2578        };
2579        let change = PathChange {
2580            block_group_id: new_bg_id,
2581            path: new_path[0].clone(),
2582            path_accession: None,
2583            start: 7,
2584            end: 11,
2585            block: insert,
2586            chromosome_index: 1,
2587            phased: 0,
2588            preserve_edge: true,
2589        };
2590        // note we are making our change against the new blockgroup, and not the parent blockgroup
2591        let tree = BlockGroup::intervaltree_for(conn, &new_bg_id, true);
2592        BlockGroup::insert_change(conn, &change, &tree).unwrap();
2593        let all_sequences = BlockGroup::get_all_sequences(conn, &new_bg_id, true);
2594        assert_eq!(
2595            all_sequences,
2596            HashSet::from_iter(vec![
2597                "AAAAAAANNNNTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2598                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2599            ])
2600        );
2601
2602        // Now, we make a change against another descendant
2603        let _new_sample = Sample::get_or_create(conn, "grandchild");
2604        let gc_bg_id = BlockGroup::get_or_create_sample_block_group(
2605            conn,
2606            "test",
2607            "grandchild",
2608            "chr1",
2609            Some("child"),
2610        )
2611        .unwrap();
2612        let new_path = Path::query(
2613            conn,
2614            "select * from paths where block_group_id = ?1",
2615            params![gc_bg_id],
2616        );
2617
2618        let insert_sequence = Sequence::new()
2619            .sequence_type("DNA")
2620            .sequence("NNNN")
2621            .save(conn);
2622        let insert_node_id = Node::create(
2623            conn,
2624            &insert_sequence.hash,
2625            &HashId::convert_str("new-hash"),
2626        );
2627
2628        let insert = PathBlock {
2629            id: 0,
2630            node_id: insert_node_id,
2631            block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
2632            sequence_start: 0,
2633            sequence_end: 4,
2634            path_start: 20,
2635            path_end: 24,
2636            strand: Strand::Forward,
2637        };
2638        let change = PathChange {
2639            block_group_id: gc_bg_id,
2640            path: new_path[0].clone(),
2641            path_accession: None,
2642            start: 20,
2643            end: 24,
2644            block: insert,
2645            chromosome_index: 1,
2646            phased: 0,
2647            preserve_edge: true,
2648        };
2649        // take out an entire block.
2650        let tree = BlockGroup::intervaltree_for(conn, &gc_bg_id, true);
2651        BlockGroup::insert_change(conn, &change, &tree).unwrap();
2652        let all_sequences = BlockGroup::get_all_sequences(conn, &gc_bg_id, true);
2653        assert_eq!(
2654            all_sequences,
2655            HashSet::from_iter(vec![
2656                "AAAAAAANNNNTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2657                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2658                "AAAAAAAAAATTTTTTTTTTNNNNCCCCCCGGGGGGGGGG".to_string(),
2659                "AAAAAAANNNNTTTTTTTTTNNNNCCCCCCGGGGGGGGGG".to_string()
2660            ])
2661        )
2662    }
2663
2664    #[test]
2665    #[should_panic]
2666    fn test_prohibits_out_of_frame_changes_against_derivative_diploid_blockgroups() {
2667        // This test ensures that we do not allow ambiguous changes by coordinates
2668        let conn = &get_connection(None).unwrap();
2669        let (_block_group_id, _path) = setup_block_group(conn);
2670        let _new_sample = Sample::get_or_create(conn, "child");
2671        let new_bg_id =
2672            BlockGroup::get_or_create_sample_block_group(conn, "test", "child", "chr1", None)
2673                .unwrap();
2674        let new_path = Path::query(
2675            conn,
2676            "select * from paths where block_group_id = ?1",
2677            rusqlite::params!(SQLValue::from(new_bg_id)),
2678        );
2679        // This is a heterozygous replacement of 5 bases with 4 bases, so positions
2680        // downstream of this are not addressable.
2681        let insert_sequence = Sequence::new()
2682            .sequence_type("DNA")
2683            .sequence("NNNN")
2684            .save(conn);
2685        let insert_node_id = Node::create(conn, &insert_sequence.hash, &HashId::convert_str("1"));
2686        let insert = PathBlock {
2687            id: 0,
2688            node_id: insert_node_id,
2689            block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
2690            sequence_start: 0,
2691            sequence_end: 4,
2692            path_start: 7,
2693            path_end: 12,
2694            strand: Strand::Forward,
2695        };
2696        let change = PathChange {
2697            block_group_id: new_bg_id,
2698            path: new_path[0].clone(),
2699            path_accession: None,
2700            start: 7,
2701            end: 12,
2702            block: insert,
2703            chromosome_index: 1,
2704            phased: 0,
2705            preserve_edge: true,
2706        };
2707        // note we are making our change against the new blockgroup, and not the parent blockgroup
2708        let tree = BlockGroup::intervaltree_for(conn, &new_bg_id, true);
2709        BlockGroup::insert_change(conn, &change, &tree).unwrap();
2710        let all_sequences = BlockGroup::get_all_sequences(conn, &new_bg_id, true);
2711        assert_eq!(
2712            all_sequences,
2713            HashSet::from_iter(vec![
2714                "AAAAAAANNNNTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2715                "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2716            ])
2717        );
2718
2719        // Now, we make a change against another descendant and get an error
2720        let _new_sample = Sample::get_or_create(conn, "grandchild");
2721        let gc_bg_id = BlockGroup::get_or_create_sample_block_group(
2722            conn,
2723            "test",
2724            "grandchild",
2725            "chr1",
2726            Some("child"),
2727        )
2728        .unwrap();
2729        let new_path = Path::query(
2730            conn,
2731            "select * from paths where block_group_id = ?1",
2732            rusqlite::params!(SQLValue::from(gc_bg_id)),
2733        );
2734
2735        let insert_sequence = Sequence::new()
2736            .sequence_type("DNA")
2737            .sequence("NNNN")
2738            .save(conn);
2739        let insert_node_id =
2740            Node::create(conn, &insert_sequence.hash, &HashId::pad_str("new-hash"));
2741
2742        let insert = PathBlock {
2743            id: 0,
2744            node_id: insert_node_id,
2745            block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
2746            sequence_start: 0,
2747            sequence_end: 4,
2748            path_start: 20,
2749            path_end: 24,
2750            strand: Strand::Forward,
2751        };
2752        let change = PathChange {
2753            block_group_id: gc_bg_id,
2754            path: new_path[0].clone(),
2755            path_accession: None,
2756            start: 20,
2757            end: 24,
2758            block: insert,
2759            chromosome_index: 1,
2760            phased: 0,
2761            preserve_edge: true,
2762        };
2763        // take out an entire block.
2764        let tree = BlockGroup::intervaltree_for(conn, &gc_bg_id, true);
2765        BlockGroup::insert_change(conn, &change, &tree).unwrap();
2766    }
2767
2768    mod test_derive_subgraph {
2769
2770        use super::*;
2771        use crate::{
2772            collection::Collection,
2773            node::Node,
2774            sequence::Sequence,
2775            test_helpers::{get_connection, setup_block_group},
2776        };
2777
2778        #[test]
2779        fn test_derive_subgraph_one_insertion() {
2780            /*
2781            AAAAAAAAAA -> TTTTTTTTTT -> CCCCCCCCCC -> GGGGGGGGGG
2782                              \-> AAAAAAAA ->/
2783            Subgraph range:  |-----------------|
2784            Sequences of the subgraph are TAAAAAAAAC, TTTTTCCCCC
2785             */
2786            let conn = &get_connection(None).unwrap();
2787            Collection::create(conn, "test");
2788            let (block_group1_id, original_path) = setup_block_group(conn);
2789
2790            let intervaltree = original_path.intervaltree(conn);
2791            let insert_start_node_id = intervaltree.query_point(16).next().unwrap().value.node_id;
2792            let insert_end_node_id = intervaltree.query_point(24).next().unwrap().value.node_id;
2793
2794            let insert_sequence = Sequence::new()
2795                .sequence_type("DNA")
2796                .sequence("AAAAAAAA")
2797                .save(conn);
2798            let insert_node_id = Node::create(
2799                conn,
2800                &insert_sequence.hash,
2801                &HashId(calculate_hash(&format!(
2802                    "test-insert-a-node.{}",
2803                    insert_sequence.hash
2804                ))),
2805            );
2806            let edge_into_insert = Edge::create(
2807                conn,
2808                insert_start_node_id,
2809                6,
2810                Strand::Forward,
2811                insert_node_id,
2812                0,
2813                Strand::Forward,
2814            );
2815            let edge_out_of_insert = Edge::create(
2816                conn,
2817                insert_node_id,
2818                8,
2819                Strand::Forward,
2820                insert_end_node_id,
2821                4,
2822                Strand::Forward,
2823            );
2824            let ref_heal_1 = Edge::create(
2825                conn,
2826                insert_start_node_id,
2827                6,
2828                Strand::Forward,
2829                insert_start_node_id,
2830                6,
2831                Strand::Forward,
2832            );
2833            let ref_heal_2 = Edge::create(
2834                conn,
2835                insert_end_node_id,
2836                4,
2837                Strand::Forward,
2838                insert_end_node_id,
2839                4,
2840                Strand::Forward,
2841            );
2842
2843            let edge_ids = [
2844                &edge_into_insert.id,
2845                &edge_out_of_insert.id,
2846                &ref_heal_1.id,
2847                &ref_heal_2.id,
2848            ];
2849            let block_group_edges = edge_ids
2850                .iter()
2851                .enumerate()
2852                .map(|(i, edge_id)| BlockGroupEdgeData {
2853                    block_group_id: block_group1_id,
2854                    edge_id: *(*edge_id),
2855                    chromosome_index: if i < 2 { 1 } else { 0 },
2856                    phased: 0,
2857                })
2858                .collect::<Vec<BlockGroupEdgeData>>();
2859            BlockGroupEdge::bulk_create(conn, &block_group_edges);
2860
2861            let insert_path =
2862                original_path.new_path_with(conn, 16, 24, &edge_into_insert, &edge_out_of_insert);
2863            assert_eq!(
2864                insert_path.sequence(conn),
2865                "AAAAAAAAAATTTTTTAAAAAAAACCCCCCGGGGGGGGGG"
2866            );
2867
2868            let all_sequences = BlockGroup::get_all_sequences(conn, &block_group1_id, false);
2869            assert_eq!(
2870                all_sequences,
2871                HashSet::from_iter(vec![
2872                    "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2873                    "AAAAAAAAAATTTTTTAAAAAAAACCCCCCGGGGGGGGGG".to_string(),
2874                ])
2875            );
2876
2877            let mut blocks = intervaltree
2878                .query(Range { start: 15, end: 25 })
2879                .map(|x| x.value)
2880                .collect::<Vec<_>>();
2881            blocks.sort_by(|a, b| a.start.cmp(&b.start));
2882            let start_block = blocks[0];
2883            let start_node_coordinate = 15 - start_block.start + start_block.sequence_start;
2884            let end_block = blocks[blocks.len() - 1];
2885            let end_node_coordinate = 25 - end_block.start + end_block.sequence_start;
2886
2887            let block_group2 = BlockGroup::create(conn, "test", None, "chr1.1");
2888            BlockGroup::derive_subgraph(
2889                conn,
2890                "test",
2891                "test",
2892                &block_group1_id,
2893                &start_block,
2894                &end_block,
2895                start_node_coordinate,
2896                end_node_coordinate,
2897                &block_group2.id,
2898            );
2899            let all_sequences2 = BlockGroup::get_all_sequences(conn, &block_group2.id, false);
2900            assert_eq!(
2901                all_sequences2,
2902                HashSet::from_iter(vec!["TTTTTCCCCC".to_string(), "TAAAAAAAAC".to_string(),])
2903            );
2904        }
2905
2906        #[test]
2907        fn test_derive_subgraph_two_independent_insertions() {
2908            /*
2909            AAAAAAAAAA -> TTTTTTTTTT -> CCCCCCCCCC -----> GGGGGGGGGG
2910                                  \-> AAAAAAAA ->/  \->TTTTTTTT -/
2911            Subgraph range:     |----------------------------------|
2912             */
2913            let conn = &get_connection(None).unwrap();
2914            Collection::create(conn, "test");
2915            let (block_group1_id, original_path) = setup_block_group(conn);
2916
2917            let intervaltree = original_path.intervaltree(conn);
2918            let insert_start_node_id = intervaltree.query_point(16).next().unwrap().value.node_id;
2919            let insert_end_node_id = intervaltree.query_point(24).next().unwrap().value.node_id;
2920
2921            let insert_sequence = Sequence::new()
2922                .sequence_type("DNA")
2923                .sequence("AAAAAAAA")
2924                .save(conn);
2925            let insert_node_id = Node::create(
2926                conn,
2927                &insert_sequence.hash,
2928                &HashId(calculate_hash(&format!(
2929                    "test-insert-a-node.{}",
2930                    insert_sequence.hash
2931                ))),
2932            );
2933            let edge_into_insert = Edge::create(
2934                conn,
2935                insert_start_node_id,
2936                6,
2937                Strand::Forward,
2938                insert_node_id,
2939                0,
2940                Strand::Forward,
2941            );
2942            let edge_out_of_insert = Edge::create(
2943                conn,
2944                insert_node_id,
2945                8,
2946                Strand::Forward,
2947                insert_end_node_id,
2948                4,
2949                Strand::Forward,
2950            );
2951            let ref_heal_1 = Edge::create(
2952                conn,
2953                insert_start_node_id,
2954                6,
2955                Strand::Forward,
2956                insert_start_node_id,
2957                6,
2958                Strand::Forward,
2959            );
2960            let ref_heal_2 = Edge::create(
2961                conn,
2962                insert_end_node_id,
2963                4,
2964                Strand::Forward,
2965                insert_end_node_id,
2966                4,
2967                Strand::Forward,
2968            );
2969
2970            let edge_ids = [
2971                &edge_into_insert.id,
2972                &edge_out_of_insert.id,
2973                &ref_heal_1.id,
2974                &ref_heal_2.id,
2975            ];
2976            let block_group_edges = edge_ids
2977                .iter()
2978                .enumerate()
2979                .map(|(i, edge_id)| BlockGroupEdgeData {
2980                    block_group_id: block_group1_id,
2981                    edge_id: *(*edge_id),
2982                    chromosome_index: if i < 2 { 1 } else { 0 },
2983                    phased: 0,
2984                })
2985                .collect::<Vec<BlockGroupEdgeData>>();
2986            BlockGroupEdge::bulk_create(conn, &block_group_edges);
2987
2988            let insert_path =
2989                original_path.new_path_with(conn, 16, 24, &edge_into_insert, &edge_out_of_insert);
2990            assert_eq!(
2991                insert_path.sequence(conn),
2992                "AAAAAAAAAATTTTTTAAAAAAAACCCCCCGGGGGGGGGG"
2993            );
2994
2995            let insert2_start_node_id = intervaltree.query_point(28).next().unwrap().value.node_id;
2996            let insert2_end_node_id = intervaltree.query_point(32).next().unwrap().value.node_id;
2997
2998            let insert2_sequence = Sequence::new()
2999                .sequence_type("DNA")
3000                .sequence("TTTTTTTT")
3001                .save(conn);
3002            let insert2_node_id = Node::create(
3003                conn,
3004                &insert2_sequence.hash,
3005                &HashId(calculate_hash(&format!(
3006                    "test-insert-t-node.{}",
3007                    insert2_sequence.hash
3008                ))),
3009            );
3010            let edge_into_insert2 = Edge::create(
3011                conn,
3012                insert2_start_node_id,
3013                6,
3014                Strand::Forward,
3015                insert2_node_id,
3016                0,
3017                Strand::Forward,
3018            );
3019            let edge_out_of_insert2 = Edge::create(
3020                conn,
3021                insert2_node_id,
3022                8,
3023                Strand::Forward,
3024                insert2_end_node_id,
3025                4,
3026                Strand::Forward,
3027            );
3028            let ref_heal_1 = Edge::create(
3029                conn,
3030                insert2_start_node_id,
3031                6,
3032                Strand::Forward,
3033                insert2_start_node_id,
3034                6,
3035                Strand::Forward,
3036            );
3037            let ref_heal_2 = Edge::create(
3038                conn,
3039                insert2_end_node_id,
3040                4,
3041                Strand::Forward,
3042                insert2_end_node_id,
3043                4,
3044                Strand::Forward,
3045            );
3046
3047            let edge_ids = [
3048                &edge_into_insert2.id,
3049                &edge_out_of_insert2.id,
3050                &ref_heal_1.id,
3051                &ref_heal_2.id,
3052            ];
3053            let block_group_edges = edge_ids
3054                .iter()
3055                .enumerate()
3056                .map(|(i, edge_id)| BlockGroupEdgeData {
3057                    block_group_id: block_group1_id,
3058                    edge_id: *(*edge_id),
3059                    chromosome_index: if i < 2 { 1 } else { 0 },
3060                    phased: 0,
3061                })
3062                .collect::<Vec<BlockGroupEdgeData>>();
3063            BlockGroupEdge::bulk_create(conn, &block_group_edges);
3064
3065            let insert2_path =
3066                insert_path.new_path_with(conn, 28, 32, &edge_into_insert2, &edge_out_of_insert2);
3067            assert_eq!(
3068                insert2_path.sequence(conn),
3069                "AAAAAAAAAATTTTTTAAAAAAAACCTTTTTTTTGGGGGG"
3070            );
3071
3072            let all_sequences = BlockGroup::get_all_sequences(conn, &block_group1_id, false);
3073            assert_eq!(
3074                all_sequences,
3075                HashSet::from_iter(vec![
3076                    "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
3077                    "AAAAAAAAAATTTTTTAAAAAAAACCCCCCGGGGGGGGGG".to_string(),
3078                    "AAAAAAAAAATTTTTTTTTTCCCCCCTTTTTTTTGGGGGG".to_string(),
3079                    "AAAAAAAAAATTTTTTAAAAAAAACCTTTTTTTTGGGGGG".to_string(),
3080                ])
3081            );
3082
3083            let mut blocks = intervaltree
3084                .query(Range { start: 15, end: 36 })
3085                .map(|x| x.value)
3086                .collect::<Vec<_>>();
3087            blocks.sort_by(|a, b| a.start.cmp(&b.start));
3088            let start_block = blocks[0];
3089            let start_node_coordinate = 15 - start_block.start + start_block.sequence_start;
3090            let end_block = blocks[blocks.len() - 1];
3091            let end_node_coordinate = 36 - end_block.start + end_block.sequence_start;
3092
3093            let block_group2 = BlockGroup::create(conn, "test", None, "chr1.1");
3094            BlockGroup::derive_subgraph(
3095                conn,
3096                "test",
3097                "test",
3098                &block_group1_id,
3099                &start_block,
3100                &end_block,
3101                start_node_coordinate,
3102                end_node_coordinate,
3103                &block_group2.id,
3104            );
3105            let all_sequences2 = BlockGroup::get_all_sequences(conn, &block_group2.id, false);
3106            assert_eq!(
3107                all_sequences2,
3108                HashSet::from_iter(vec![
3109                    "TTTTTCCCCCCCCCCGGGGGG".to_string(),
3110                    "TAAAAAAAACCCCCCGGGGGG".to_string(),
3111                    "TTTTTCCCCCCTTTTTTTTGG".to_string(),
3112                    "TAAAAAAAACCTTTTTTTTGG".to_string(),
3113                ])
3114            );
3115        }
3116
3117        #[test]
3118        fn test_derive_subgraph_two_independent_insertions_and_one_deletion() {
3119            /*
3120                       /--------------------------------------------\  (<-- Deletion edge)
3121            AAAAAAAAAA -> TTTTTTTTTT -> CCCCCCCCCC -----> GGGGGGGGGG
3122                                  \-> AAAAAAAA ->/  \->TTTTTTTT -/
3123            Subgraph range: |----------------------------------|
3124
3125            Confirms that deletion edge is ignored and not added to subgraph
3126             */
3127            let conn = &get_connection(None).unwrap();
3128            Collection::create(conn, "test");
3129            let (block_group1_id, original_path) = setup_block_group(conn);
3130
3131            let intervaltree = original_path.intervaltree(conn);
3132            let insert_start_node_id = intervaltree.query_point(16).next().unwrap().value.node_id;
3133            let insert_end_node_id = intervaltree.query_point(24).next().unwrap().value.node_id;
3134
3135            let insert_sequence = Sequence::new()
3136                .sequence_type("DNA")
3137                .sequence("AAAAAAAA")
3138                .save(conn);
3139            let insert_node_id = Node::create(
3140                conn,
3141                &insert_sequence.hash,
3142                &HashId(calculate_hash(&format!(
3143                    "test-insert-a-node.{}",
3144                    insert_sequence.hash
3145                ))),
3146            );
3147            let edge_into_insert = Edge::create(
3148                conn,
3149                insert_start_node_id,
3150                6,
3151                Strand::Forward,
3152                insert_node_id,
3153                0,
3154                Strand::Forward,
3155            );
3156            let edge_out_of_insert = Edge::create(
3157                conn,
3158                insert_node_id,
3159                8,
3160                Strand::Forward,
3161                insert_end_node_id,
3162                4,
3163                Strand::Forward,
3164            );
3165            let ref_heal_1 = Edge::create(
3166                conn,
3167                insert_start_node_id,
3168                6,
3169                Strand::Forward,
3170                insert_start_node_id,
3171                6,
3172                Strand::Forward,
3173            );
3174            let ref_heal_2 = Edge::create(
3175                conn,
3176                insert_end_node_id,
3177                4,
3178                Strand::Forward,
3179                insert_end_node_id,
3180                4,
3181                Strand::Forward,
3182            );
3183
3184            let edge_ids = [
3185                &edge_into_insert.id,
3186                &edge_out_of_insert.id,
3187                &ref_heal_1.id,
3188                &ref_heal_2.id,
3189            ];
3190            let block_group_edges = edge_ids
3191                .iter()
3192                .map(|edge_id| BlockGroupEdgeData {
3193                    block_group_id: block_group1_id,
3194                    edge_id: *(*edge_id),
3195                    chromosome_index: NO_CHROMOSOME_INDEX,
3196                    phased: 0,
3197                })
3198                .collect::<Vec<BlockGroupEdgeData>>();
3199            BlockGroupEdge::bulk_create(conn, &block_group_edges);
3200
3201            let insert_path =
3202                original_path.new_path_with(conn, 16, 24, &edge_into_insert, &edge_out_of_insert);
3203            assert_eq!(
3204                insert_path.sequence(conn),
3205                "AAAAAAAAAATTTTTTAAAAAAAACCCCCCGGGGGGGGGG"
3206            );
3207
3208            let insert2_start_node_id = intervaltree.query_point(28).next().unwrap().value.node_id;
3209            let insert2_end_node_id = intervaltree.query_point(32).next().unwrap().value.node_id;
3210
3211            let insert2_sequence = Sequence::new()
3212                .sequence_type("DNA")
3213                .sequence("TTTTTTTT")
3214                .save(conn);
3215            let insert2_node_id = Node::create(
3216                conn,
3217                &insert2_sequence.hash,
3218                &HashId(calculate_hash(&format!(
3219                    "test-insert-t-node.{}",
3220                    insert2_sequence.hash
3221                ))),
3222            );
3223            let edge_into_insert2 = Edge::create(
3224                conn,
3225                insert2_start_node_id,
3226                6,
3227                Strand::Forward,
3228                insert2_node_id,
3229                0,
3230                Strand::Forward,
3231            );
3232            let edge_out_of_insert2 = Edge::create(
3233                conn,
3234                insert2_node_id,
3235                8,
3236                Strand::Forward,
3237                insert2_end_node_id,
3238                4,
3239                Strand::Forward,
3240            );
3241            let ref_heal_1 = Edge::create(
3242                conn,
3243                insert2_start_node_id,
3244                6,
3245                Strand::Forward,
3246                insert2_start_node_id,
3247                6,
3248                Strand::Forward,
3249            );
3250            let ref_heal_2 = Edge::create(
3251                conn,
3252                insert2_end_node_id,
3253                4,
3254                Strand::Forward,
3255                insert2_end_node_id,
3256                4,
3257                Strand::Forward,
3258            );
3259
3260            let edge_ids = [
3261                &edge_into_insert2.id,
3262                &edge_out_of_insert2.id,
3263                &ref_heal_1.id,
3264                &ref_heal_2.id,
3265            ];
3266            let block_group_edges = edge_ids
3267                .iter()
3268                .map(|edge_id| BlockGroupEdgeData {
3269                    block_group_id: block_group1_id,
3270                    edge_id: *(*edge_id),
3271                    chromosome_index: NO_CHROMOSOME_INDEX,
3272                    phased: 0,
3273                })
3274                .collect::<Vec<BlockGroupEdgeData>>();
3275            BlockGroupEdge::bulk_create(conn, &block_group_edges);
3276
3277            let insert2_path =
3278                insert_path.new_path_with(conn, 28, 32, &edge_into_insert2, &edge_out_of_insert2);
3279            assert_eq!(
3280                insert2_path.sequence(conn),
3281                "AAAAAAAAAATTTTTTAAAAAAAACCTTTTTTTTGGGGGG"
3282            );
3283
3284            let deletion_end_node_id = intervaltree.query_point(38).next().unwrap().value.node_id;
3285            let deletion_edge = Edge::create(
3286                conn,
3287                insert_node_id,
3288                8,
3289                Strand::Forward,
3290                deletion_end_node_id,
3291                8,
3292                Strand::Forward,
3293            );
3294            let ref_heal_1 = Edge::create(
3295                conn,
3296                insert_node_id,
3297                8,
3298                Strand::Forward,
3299                insert_node_id,
3300                8,
3301                Strand::Forward,
3302            );
3303            let ref_heal_2 = Edge::create(
3304                conn,
3305                deletion_end_node_id,
3306                8,
3307                Strand::Forward,
3308                deletion_end_node_id,
3309                8,
3310                Strand::Forward,
3311            );
3312            let block_group_edges = [
3313                BlockGroupEdgeData {
3314                    block_group_id: block_group1_id,
3315                    edge_id: deletion_edge.id,
3316                    chromosome_index: NO_CHROMOSOME_INDEX,
3317                    phased: 0,
3318                },
3319                BlockGroupEdgeData {
3320                    block_group_id: block_group1_id,
3321                    edge_id: ref_heal_1.id,
3322                    chromosome_index: NO_CHROMOSOME_INDEX,
3323                    phased: 0,
3324                },
3325                BlockGroupEdgeData {
3326                    block_group_id: block_group1_id,
3327                    edge_id: ref_heal_2.id,
3328                    chromosome_index: NO_CHROMOSOME_INDEX,
3329                    phased: 0,
3330                },
3331            ];
3332            BlockGroupEdge::bulk_create(conn, &block_group_edges);
3333
3334            let all_sequences = BlockGroup::get_all_sequences(conn, &block_group1_id, false);
3335            assert_eq!(
3336                all_sequences,
3337                HashSet::from_iter(vec![
3338                    "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
3339                    "AAAAAAAAAATTTTTTAAAAAAAACCCCCCGGGGGGGGGG".to_string(),
3340                    "AAAAAAAAAATTTTTTTTTTCCCCCCTTTTTTTTGGGGGG".to_string(),
3341                    "AAAAAAAAAATTTTTTAAAAAAAACCTTTTTTTTGGGGGG".to_string(),
3342                    "AAAAAAAAAATTTTTTAAAAAAAAGG".to_string(), // Sequence including deletion
3343                ])
3344            );
3345
3346            let mut blocks = intervaltree
3347                .query(Range { start: 15, end: 36 })
3348                .map(|x| x.value)
3349                .collect::<Vec<_>>();
3350            blocks.sort_by(|a, b| a.start.cmp(&b.start));
3351            let start_block = blocks[0];
3352            let start_node_coordinate = 15 - start_block.start + start_block.sequence_start;
3353            let end_block = blocks[blocks.len() - 1];
3354            let end_node_coordinate = 36 - end_block.start + end_block.sequence_start;
3355
3356            let block_group2 = BlockGroup::create(conn, "test", None, "chr1.1");
3357            BlockGroup::derive_subgraph(
3358                conn,
3359                "test",
3360                "test",
3361                &block_group1_id,
3362                &start_block,
3363                &end_block,
3364                start_node_coordinate,
3365                end_node_coordinate,
3366                &block_group2.id,
3367            );
3368            let all_sequences2 = BlockGroup::get_all_sequences(conn, &block_group2.id, false);
3369            assert_eq!(
3370                all_sequences2,
3371                // The deletion is not included in the cloned subgraph since one end of it is
3372                // outside the specified range
3373                HashSet::from_iter(vec![
3374                    "TTTTTCCCCCCCCCCGGGGGG".to_string(),
3375                    "TAAAAAAAACCCCCCGGGGGG".to_string(),
3376                    "TTTTTCCCCCCTTTTTTTTGG".to_string(),
3377                    "TAAAAAAAACCTTTTTTTTGG".to_string(),
3378                ])
3379            );
3380        }
3381    }
3382}