1use ahash::{HashMap, HashMapExt, HashSet, HashSetExt};
2use tracing::{debug, warn};
3
4use std::fs::File;
5use std::io::{self, BufRead, BufReader, Cursor, Read, Write};
6use std::path::Path;
7
8use anyhow::{Context, Result, anyhow};
9use bstr::{BStr, BString, ByteSlice};
10use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
11use thiserror::Error;
12use tsg_core::graph::TSGraph;
13use zstd::{decode_all, encode_all};
14
15const BLOCK_HEADER: u8 = 0x01;
17const BLOCK_GRAPH: u8 = 0x02;
18const BLOCK_NODE: u8 = 0x03;
19const BLOCK_EDGE: u8 = 0x04;
20const BLOCK_ATTRIBUTE: u8 = 0x05;
21const BLOCK_CHAIN: u8 = 0x06;
22const BLOCK_PATH: u8 = 0x07;
23const BLOCK_LINK: u8 = 0x08;
24const BLOCK_DICTIONARY: u8 = 0x09;
25
26const BTSG_VERSION: u32 = 1;
28
29#[derive(Error, Debug)]
30pub enum BTSGError {
31 #[error("IO error: {0}")]
32 Io(#[from] io::Error),
33
34 #[error("Compression error: {0}")]
35 Compression(String),
36
37 #[error("Invalid block type: {0}")]
38 InvalidBlockType(u8),
39
40 #[error("Invalid data format: {0}")]
41 InvalidFormat(String),
42
43 #[error("Dictionary error: {0}")]
44 Dictionary(String),
45}
46
47#[derive(Default)]
49struct StringDictionary {
50 str_to_id: HashMap<BString, u32>,
52 id_to_str: HashMap<u32, BString>,
54 next_id: u32,
56}
57
58impl StringDictionary {
59 fn new() -> Self {
60 Self::default()
61 }
62
63 fn add(&mut self, s: &BStr) -> u32 {
64 if let Some(&id) = self.str_to_id.get(s.as_bytes()) {
65 return id;
66 }
67
68 let id = self.next_id;
69 self.next_id += 1;
70
71 let s_owned = s.to_owned();
72 self.str_to_id.insert(s_owned.clone(), id);
73 self.id_to_str.insert(id, s_owned);
74
75 id
76 }
77
78 fn str(&self, id: u32) -> Option<&BStr> {
79 self.id_to_str.get(&id).map(|s| s.as_bstr())
80 }
81
82 fn id(&self, s: &BStr) -> Option<u32> {
83 self.str_to_id.get(s.as_bytes()).copied()
84 }
85
86 fn write<W: Write>(&self, writer: &mut W) -> Result<()> {
87 writer.write_u32::<LittleEndian>(self.id_to_str.len() as u32)?;
89
90 for (&id, string) in &self.id_to_str {
92 writer.write_u32::<LittleEndian>(id)?;
93 writer.write_u32::<LittleEndian>(string.len() as u32)?;
94 writer.write_all(string)?;
95 }
96
97 Ok(())
98 }
99
100 fn read<R: Read>(reader: &mut R) -> Result<Self> {
101 let mut dict = Self::new();
102
103 let count = reader.read_u32::<LittleEndian>()?;
105
106 for _ in 0..count {
108 let id = reader.read_u32::<LittleEndian>()?;
109 let len = reader.read_u32::<LittleEndian>()? as usize;
110
111 let mut bytes = vec![0u8; len];
112 reader.read_exact(&mut bytes)?;
113
114 let string = BString::from(bytes);
115 dict.str_to_id.insert(string.clone(), id);
116 dict.id_to_str.insert(id, string);
117
118 if id >= dict.next_id {
119 dict.next_id = id + 1;
120 }
121 }
122
123 Ok(dict)
124 }
125}
126
127struct Block {
129 block_type: u8,
130 data: Vec<u8>,
131}
132
133impl Block {
134 fn new(block_type: u8, data: Vec<u8>) -> Self {
135 Self { block_type, data }
136 }
137
138 fn write<W: Write>(&self, writer: &mut W) -> Result<()> {
139 writer.write_u8(self.block_type)?;
141
142 writer.write_u32::<LittleEndian>(self.data.len() as u32)?;
144
145 writer.write_all(&self.data)?;
147
148 Ok(())
149 }
150
151 fn read<R: Read>(reader: &mut R) -> Result<Self> {
152 let block_type = reader.read_u8()?;
154
155 let length = reader.read_u32::<LittleEndian>()? as usize;
157
158 let mut data = vec![0u8; length];
160 reader.read_exact(&mut data)?;
161
162 Ok(Self { block_type, data })
163 }
164}
165
166#[derive(Default)]
168pub struct BTSGCompressor {
169 node_dict: StringDictionary,
171 edge_dict: StringDictionary,
172 graph_dict: StringDictionary,
173 read_dict: StringDictionary,
174 chromosome_dict: StringDictionary,
175 attribute_dict: StringDictionary,
176
177 compression_level: i32,
179}
180
181impl BTSGCompressor {
182 pub fn new(compression_level: i32) -> Self {
183 Self {
184 compression_level,
185 ..Default::default()
186 }
187 }
188
189 pub fn compress<P: AsRef<Path>>(&mut self, input_path: P, output_path: P) -> Result<()> {
190 self.build_dictionaries(input_path.as_ref())?;
192
193 let mut output_file = File::create(output_path)?;
195
196 output_file.write_all(b"BTSG")?;
198 output_file.write_u32::<LittleEndian>(BTSG_VERSION)?;
199
200 let dictionary_block = self.create_dictionary_block()?;
202 dictionary_block.write(&mut output_file)?;
203
204 let input_file = File::open(input_path)?;
206 let reader = BufReader::new(input_file);
207
208 let mut header_data = Vec::new();
210 let mut graphs: HashMap<BString, Vec<String>> = HashMap::new();
211 let mut current_graph: Option<BString> = None;
212
213 for line in reader.lines() {
214 let line = line?;
215 if line.trim().is_empty() || line.starts_with('#') {
216 continue;
217 }
218
219 let fields: Vec<&str> = line.split('\t').collect();
220 if fields.is_empty() {
221 continue;
222 }
223
224 match fields[0] {
225 "H" => {
226 header_data.push(line);
228 }
229 "G" => {
230 if fields.len() >= 2 {
232 let graph_id = BString::from(fields[1]);
233 current_graph = Some(graph_id.clone());
234 graphs.entry(graph_id).or_default().push(line);
235 }
236 }
237 "N" | "E" | "A" | "C" | "P" | "L" => {
238 if let Some(ref graph_id) = current_graph {
240 graphs.entry(graph_id.clone()).or_default().push(line);
241 } else {
242 let default_graph = BString::from("default");
244 current_graph = Some(default_graph.clone());
245 graphs.entry(default_graph).or_default().push(line);
246 }
247 }
248 _ => {
249 eprintln!("Warning: Unknown record type: {}", fields[0]);
251 }
252 }
253 }
254
255 if !header_data.is_empty() {
257 let header_block =
258 self.create_compressed_block(BLOCK_HEADER, header_data.join("\n"))?;
259 header_block.write(&mut output_file)?;
260 }
261
262 for (graph_id, graph_data) in graphs {
264 let graph_block = self.create_compressed_block(
266 BLOCK_GRAPH,
267 format!("G\t{}\n{}", graph_id, graph_data.join("\n")),
268 )?;
269 graph_block.write(&mut output_file)?;
270 }
271
272 Ok(())
273 }
274
275 fn build_dictionaries<P: AsRef<Path>>(&mut self, input_path: P) -> Result<()> {
276 let file = File::open(input_path)?;
277 let reader = BufReader::new(file);
278
279 let mut read_ids = HashSet::new();
280 let mut chromosomes = HashSet::new();
281
282 for line in reader.lines() {
283 let line = line?;
284 if line.trim().is_empty() || line.starts_with('#') {
285 continue;
286 }
287
288 let fields: Vec<&str> = line.split('\t').collect();
289 if fields.is_empty() {
290 continue;
291 }
292
293 match fields[0] {
294 "G" => {
295 if fields.len() >= 2 {
297 self.graph_dict.add(fields[1].as_bytes().as_bstr());
298 }
299 }
300 "N" => {
301 if fields.len() >= 4 {
303 self.node_dict.add(fields[1].as_bytes().as_bstr());
304
305 let genomic_loc = fields[2];
307 if let Some(chr_end) = genomic_loc.find(':') {
308 let chromosome = &genomic_loc[0..chr_end];
309 chromosomes.insert(chromosome.to_string());
310 }
311
312 let reads = fields[3];
314 for read_entry in reads.split(',') {
315 if let Some(colon_pos) = read_entry.find(':') {
316 let read_id = &read_entry[0..colon_pos];
317 read_ids.insert(read_id.to_string());
318 }
319 }
320 }
321 }
322 "E" => {
323 if fields.len() >= 4 {
325 self.edge_dict.add(fields[1].as_bytes().as_bstr());
326 self.node_dict.add(fields[2].as_bytes().as_bstr());
327 self.node_dict.add(fields[3].as_bytes().as_bstr());
328 }
329 }
330 "A" => {
331 if fields.len() >= 4 {
333 self.attribute_dict.add(fields[3].as_bytes().as_bstr());
334 }
335 }
336 _ => {}
337 }
338 }
339
340 for read_id in read_ids {
342 self.read_dict.add(read_id.as_bytes().as_bstr());
343 }
344
345 for chromosome in chromosomes {
346 self.chromosome_dict.add(chromosome.as_bytes().as_bstr());
347 }
348
349 Ok(())
350 }
351
352 fn create_dictionary_block(&self) -> Result<Block> {
353 let mut buffer = Vec::new();
354
355 buffer.write_u8(0x01)?; self.node_dict.write(&mut buffer)?;
358
359 buffer.write_u8(0x02)?; self.edge_dict.write(&mut buffer)?;
361
362 buffer.write_u8(0x03)?; self.graph_dict.write(&mut buffer)?;
364
365 buffer.write_u8(0x04)?; self.read_dict.write(&mut buffer)?;
367
368 buffer.write_u8(0x05)?; self.chromosome_dict.write(&mut buffer)?;
370
371 buffer.write_u8(0x06)?; self.attribute_dict.write(&mut buffer)?;
373
374 let compressed = encode_all(&buffer[..], self.compression_level)
376 .map_err(|e| BTSGError::Compression(e.to_string()))?;
377
378 Ok(Block::new(BLOCK_DICTIONARY, compressed))
379 }
380
381 fn create_compressed_block(&self, block_type: u8, data: String) -> Result<Block> {
382 if block_type == BLOCK_GRAPH {
384 let mut lines = data.lines();
387
388 if let Some(graph_line) = lines.next() {
390 let mut cleaned_data = String::from(graph_line);
392
393 for line in lines {
395 if !line.starts_with("G\t") {
396 cleaned_data.push('\n');
397 cleaned_data.push_str(line);
398 }
399 }
400
401 let compressed = encode_all(cleaned_data.as_bytes(), self.compression_level)
403 .map_err(|e| BTSGError::Compression(e.to_string()))?;
404
405 return Ok(Block::new(block_type, compressed));
406 }
407 }
408
409 let compressed = encode_all(data.as_bytes(), self.compression_level)
411 .map_err(|e| BTSGError::Compression(e.to_string()))?;
412
413 Ok(Block::new(block_type, compressed))
414 }
415}
416
417#[derive(Default)]
419pub struct BTSGDecompressor {
420 node_dict: StringDictionary,
422 edge_dict: StringDictionary,
423 graph_dict: StringDictionary,
424 read_dict: StringDictionary,
425 chromosome_dict: StringDictionary,
426 attribute_dict: StringDictionary,
427}
428
429impl BTSGDecompressor {
430 pub fn new() -> Self {
431 Self::default()
432 }
433
434 pub fn decompress<P: AsRef<Path>>(&mut self, input_path: P, output_path: P) -> Result<()> {
435 let mut input_file = File::open(input_path)?;
436
437 let mut magic = [0u8; 4];
439 input_file.read_exact(&mut magic)?;
440 if &magic != b"BTSG" {
441 return Err(BTSGError::InvalidFormat("Not a valid BTSG file".to_string()).into());
442 }
443
444 let version = input_file.read_u32::<LittleEndian>()?;
446 if version != BTSG_VERSION {
447 return Err(
448 BTSGError::InvalidFormat(format!("Unsupported BTSG version: {}", version)).into(),
449 );
450 }
451
452 let mut output_file = File::create(output_path)?;
453
454 while let Ok(block) = Block::read(&mut input_file) {
456 match block.block_type {
457 BLOCK_DICTIONARY => {
458 self.read_dictionaries(&block.data)?;
460 }
461 BLOCK_HEADER => {
462 let decompressed = decode_all(&block.data[..])
464 .map_err(|e| BTSGError::Compression(e.to_string()))?;
465 output_file.write_all(&decompressed)?;
466 output_file.write_all(b"\n")?;
467 }
468 BLOCK_GRAPH => {
469 let decompressed = decode_all(&block.data[..])
471 .map_err(|e| BTSGError::Compression(e.to_string()))?;
472
473 let content = String::from_utf8_lossy(&decompressed);
475 let mut lines = content.lines();
476
477 if let Some(first_line) = lines.next() {
479 output_file.write_all(first_line.as_bytes())?;
481 output_file.write_all(b"\n")?;
482
483 for line in lines {
485 output_file.write_all(line.as_bytes())?;
486 output_file.write_all(b"\n")?;
487 }
488 }
489 }
490 _ => {
491 return Err(BTSGError::InvalidBlockType(block.block_type).into());
492 }
493 }
494 }
495
496 Ok(())
497 }
498
499 fn read_dictionaries(&mut self, data: &[u8]) -> Result<()> {
500 let decompressed = decode_all(data).map_err(|e| BTSGError::Compression(e.to_string()))?;
502 let mut cursor = io::Cursor::new(decompressed);
503
504 while let Ok(dict_type) = cursor.read_u8() {
506 match dict_type {
507 0x01 => {
508 self.node_dict = StringDictionary::read(&mut cursor)?;
510 }
511 0x02 => {
512 self.edge_dict = StringDictionary::read(&mut cursor)?;
514 }
515 0x03 => {
516 self.graph_dict = StringDictionary::read(&mut cursor)?;
518 }
519 0x04 => {
520 self.read_dict = StringDictionary::read(&mut cursor)?;
522 }
523 0x05 => {
524 self.chromosome_dict = StringDictionary::read(&mut cursor)?;
526 }
527 0x06 => {
528 self.attribute_dict = StringDictionary::read(&mut cursor)?;
530 }
531 _ => {
532 return Err(BTSGError::InvalidFormat(format!(
533 "Unknown dictionary type: {}",
534 dict_type
535 ))
536 .into());
537 }
538 }
539 }
540 Ok(())
541 }
542}
543
544impl BTSGDecompressor {
546 pub fn decompress_to_string<P: AsRef<Path>>(&mut self, input_path: P) -> Result<String> {
548 let mut input_file = File::open(input_path)?;
549
550 let mut magic = [0u8; 4];
552 input_file.read_exact(&mut magic)?;
553 if &magic != b"BTSG" {
554 return Err(BTSGError::InvalidFormat("Not a valid BTSG file".to_string()).into());
555 }
556
557 let version = input_file.read_u32::<LittleEndian>()?;
559 if version != BTSG_VERSION {
560 return Err(
561 BTSGError::InvalidFormat(format!("Unsupported BTSG version: {}", version)).into(),
562 );
563 }
564
565 let mut output = String::new();
566
567 while let Ok(block) = Block::read(&mut input_file) {
569 match block.block_type {
570 BLOCK_DICTIONARY => {
571 self.read_dictionaries(&block.data)?;
573 }
574 BLOCK_HEADER => {
575 let decompressed = decode_all(&block.data[..])
577 .map_err(|e| BTSGError::Compression(e.to_string()))?;
578 output.push_str(&String::from_utf8_lossy(&decompressed));
579 output.push('\n');
580 }
581 BLOCK_GRAPH => {
582 let decompressed = decode_all(&block.data[..])
584 .map_err(|e| BTSGError::Compression(e.to_string()))?;
585
586 let content = String::from_utf8_lossy(&decompressed);
588 let mut lines = content.lines();
589
590 if let Some(first_line) = lines.next() {
592 output.push_str(first_line);
594 output.push('\n');
595
596 for line in lines {
598 output.push_str(line);
599 output.push('\n');
600 }
601 }
602 }
603 _ => {
604 return Err(BTSGError::InvalidBlockType(block.block_type).into());
605 }
606 }
607 }
608
609 Ok(output)
610 }
611}
612
613pub trait BTSG {
614 fn from_btsg<P: AsRef<Path>>(path: P) -> Result<Self>
615 where
616 Self: Sized;
617
618 fn to_btsg<P: AsRef<Path>>(&self, path: P, compression_level: i32) -> Result<()>
619 where
620 Self: Sized;
621
622 fn from_btsg_direct<P: AsRef<Path>>(path: P) -> Result<Self>
623 where
624 Self: Sized;
625}
626
627impl BTSG for TSGraph {
628 fn from_btsg<P: AsRef<Path>>(path: P) -> Result<Self> {
630 debug!(
631 "Loading TSGraph from BTSG file: {}",
632 path.as_ref().display()
633 );
634
635 let mut decompressor = BTSGDecompressor::new();
637 let tsg_content = decompressor
638 .decompress_to_string(path)
639 .context("Failed to decompress BTSG file")?;
640
641 let cursor = Cursor::new(tsg_content);
643 let mut reader = BufReader::new(cursor);
644 Self::from_reader(&mut reader)
646 }
647
648 fn from_btsg_direct<P: AsRef<Path>>(path: P) -> Result<Self> {
650 let mut input_file = File::open(path.as_ref()).context(format!(
651 "Failed to open BTSG file: {}",
652 path.as_ref().display()
653 ))?;
654
655 let mut magic = [0u8; 4];
657 input_file
658 .read_exact(&mut magic)
659 .context("Failed to read BTSG magic number")?;
660
661 if &magic != b"BTSG" {
662 return Err(anyhow!("Not a valid BTSG file - invalid magic number"));
663 }
664
665 let version = input_file
667 .read_u32::<LittleEndian>()
668 .context("Failed to read BTSG version")?;
669
670 debug!("Reading BTSG file version {}", version);
671 let mut tsg_content = Vec::new();
673
674 loop {
676 let block_type = match input_file.read_u8() {
678 Ok(t) => t,
679 Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => break, Err(e) => return Err(anyhow!("Error reading block type: {}", e)),
681 };
682
683 let block_length = match input_file.read_u32::<LittleEndian>() {
684 Ok(len) => len,
685 Err(e) => return Err(anyhow!("Error reading block length: {}", e)),
686 };
687
688 let mut block_data = vec![0u8; block_length as usize];
690 input_file
691 .read_exact(&mut block_data)
692 .context("Failed to read block data")?;
693
694 match block_type {
696 BLOCK_DICTIONARY => {
697 debug!("Processing dictionary block");
698 }
700 BLOCK_HEADER => {
701 debug!("Processing header block");
702 let decompressed = decode_all(&block_data[..])
704 .map_err(|e| anyhow!("Failed to decompress header block: {}", e))?;
705
706 tsg_content.extend_from_slice(&decompressed);
708 tsg_content.push(b'\n');
709 }
710 BLOCK_GRAPH => {
711 debug!("Processing graph block");
712
713 let decompressed = decode_all(&block_data[..])
715 .map_err(|e| anyhow!("Failed to decompress graph block: {}", e))?;
716
717 let content = String::from_utf8_lossy(&decompressed);
719 let mut lines = content.lines();
720
721 if let Some(first_line) = lines.next() {
723 tsg_content.extend_from_slice(first_line.as_bytes());
725 tsg_content.push(b'\n');
726
727 for line in lines {
729 tsg_content.extend_from_slice(line.as_bytes());
730 tsg_content.push(b'\n');
731 }
732 }
733 }
734 BLOCK_NODE | BLOCK_EDGE | BLOCK_ATTRIBUTE | BLOCK_CHAIN | BLOCK_PATH
735 | BLOCK_LINK => {
736 debug!("Processing block type {}", block_type);
737 let decompressed = decode_all(&block_data[..]).map_err(|e| {
739 anyhow!("Failed to decompress block type {}: {}", block_type, e)
740 })?;
741
742 tsg_content.extend_from_slice(&decompressed);
744 tsg_content.push(b'\n');
745 }
746 _ => {
747 warn!("Unknown block type: {}", block_type);
748 }
749 }
750 }
751
752 let cursor = Cursor::new(tsg_content);
754 let reader = BufReader::new(cursor);
755 Self::from_reader(reader)
756 }
757
758 fn to_btsg<P: AsRef<Path>>(&self, path: P, compression_level: i32) -> Result<()> {
760 let temp_dir = tempfile::tempdir().context("Failed to create temporary directory")?;
762 let temp_tsg_path = temp_dir.path().join("temp.tsg");
763
764 self.to_file(&temp_tsg_path)
766 .context("Failed to write TSGraph to temporary file")?;
767
768 let mut compressor = BTSGCompressor::new(compression_level);
770
771 compressor
773 .compress(&temp_tsg_path, &path.as_ref().to_path_buf())
774 .context("Failed to compress TSG to BTSG")?;
775
776 Ok(())
777 }
778}
779
780#[cfg(test)]
781mod tests {
782 use std::str::FromStr;
783
784 use tsg_core::graph::{EdgeData, GraphSection, Header, NodeData, StructuralVariant};
785
786 use super::*;
787 use tempfile::NamedTempFile;
788
789 #[test]
790 fn test_string_dictionary() {
791 let mut dict = StringDictionary::new();
792
793 let id1 = dict.add("hello".as_bytes().as_bstr());
795 let id2 = dict.add("world".as_bytes().as_bstr());
796 let id3 = dict.add("hello".as_bytes().as_bstr()); assert_eq!(id1, 0);
800 assert_eq!(id2, 1);
801 assert_eq!(id3, 0); assert_eq!(dict.str(id1).unwrap(), "hello".as_bytes().as_bstr());
805 assert_eq!(dict.str(id2).unwrap(), "world".as_bytes().as_bstr());
806
807 assert_eq!(dict.id("hello".as_bytes().as_bstr()).unwrap(), id1);
809 assert_eq!(dict.id("world".as_bytes().as_bstr()).unwrap(), id2);
810 assert_eq!(dict.id("unknown".as_bytes().as_bstr()), None);
811
812 let mut buffer = Vec::new();
814 dict.write(&mut buffer).unwrap();
815
816 let mut cursor = io::Cursor::new(buffer);
817 let loaded_dict = StringDictionary::read(&mut cursor).unwrap();
818
819 assert_eq!(loaded_dict.str(id1).unwrap(), "hello".as_bytes().as_bstr());
821 assert_eq!(loaded_dict.str(id2).unwrap(), "world".as_bytes().as_bstr());
822 assert_eq!(loaded_dict.id("hello".as_bytes().as_bstr()).unwrap(), id1);
823 assert_eq!(loaded_dict.id("world".as_bytes().as_bstr()).unwrap(), id2);
824 }
825
826 #[test]
827 fn test_block_serialization() {
828 let data = b"test data".to_vec();
829 let block = Block::new(BLOCK_HEADER, data.clone());
830
831 let mut buffer = Vec::new();
832 block.write(&mut buffer).unwrap();
833
834 let mut cursor = io::Cursor::new(buffer);
835 let loaded_block = Block::read(&mut cursor).unwrap();
836
837 assert_eq!(loaded_block.block_type, BLOCK_HEADER);
838 assert_eq!(loaded_block.data, data);
839 }
840
841 #[test]
842 fn test_compression_round_trip() -> Result<()> {
843 let mut temp_tsg = NamedTempFile::new()?;
845 temp_tsg.write_all(b"H\tTSG\t1.0\nH\treference\tGRCh38\nG\tg1\nN\tn1\tchr1:+:1000-2000\tread1:SO\nE\te1\tn1\tn2\tchr1,chr1,2000,3000,splice\n")?;
846
847 let temp_btsg = NamedTempFile::new()?;
849 let temp_btsg_path = temp_btsg.path().to_path_buf();
850
851 let temp_out = NamedTempFile::new()?;
853 let temp_out_path = temp_out.path().to_path_buf();
854
855 let mut compressor = BTSGCompressor::new(3); compressor.compress(temp_tsg.path(), &temp_btsg_path)?;
858
859 let mut decompressor = BTSGDecompressor::new();
861 decompressor.decompress(&temp_btsg_path, &temp_out_path)?;
862
863 let original = std::fs::read_to_string(temp_tsg.path())?;
865 let roundtrip = std::fs::read_to_string(&temp_out_path)?;
866
867 let original_lines: Vec<&str> = original.lines().collect();
869 let roundtrip_lines: Vec<&str> = roundtrip.lines().collect();
870
871 assert_eq!(original_lines, roundtrip_lines);
872
873 Ok(())
874 }
875
876 #[test]
877 fn test_from_btsg() -> Result<()> {
878 let mut temp_tsg = NamedTempFile::new()?;
880 temp_tsg.write_all(b"H\tTSG\t1.0\nH\treference\tGRCh38\nG\tg1\nN\tn1\tchr1:+:1000-2000\tread1:SO\nE\te1\tn1\tn2\tchr1,chr1,2000,3000,splice\n")?;
881
882 let temp_btsg = NamedTempFile::new()?;
884 let temp_btsg_path = temp_btsg.path().to_path_buf();
885 let mut compressor = BTSGCompressor::new(3); compressor.compress(temp_tsg.path(), &temp_btsg_path)?;
888
889 let graph = TSGraph::from_btsg(&temp_btsg_path)?;
891
892 assert_eq!(graph.nodes("g1").len(), 2);
894 assert_eq!(graph.edges("g1").len(), 1);
895
896 Ok(())
897 }
898
899 #[test]
900 fn test_from_btsg_roundtrip2() -> Result<()> {
901 let mut graph = TSGraph::new();
903
904 let header1 = Header::builder().tag("TSG").value("1.0").build();
906 let header2 = Header::builder().tag("reference").value("GRCh38").build();
907 graph.headers.push(header1);
908 graph.headers.push(header2);
909
910 let graph_id: BString = "test_graph".into();
912 let mut graph_section = GraphSection::new(graph_id.clone());
913
914 let node1 = NodeData::from_str("N\tn1\tchr1:+:1000-2000\tread1:SO")?;
916 let node2 = NodeData::from_str("N\tn2\tchr1:+:3000-4000\tread1:IN")?;
917
918 graph_section.add_node(node1)?;
919 graph_section.add_node(node2)?;
920
921 let edge_data = EdgeData {
923 id: "e1".into(),
924 sv: StructuralVariant::from_str("chr1,chr1,2000,3000,splice")?,
925 attributes: Default::default(),
926 };
927 graph_section.add_edge(
928 "n1".as_bytes().as_bstr(),
929 "n2".as_bytes().as_bstr(),
930 edge_data,
931 )?;
932
933 graph.graphs.insert(graph_id, graph_section);
935
936 let temp_tsg = NamedTempFile::new()?;
938 let temp_tsg_path = temp_tsg.path().to_path_buf();
939
940 let temp_btsg = NamedTempFile::new()?;
942 let temp_btsg_path = temp_btsg.path().to_path_buf();
943
944 graph.to_file(&temp_tsg_path)?;
946
947 graph.to_btsg(&temp_btsg_path, 3)?;
949
950 let loaded_graph = TSGraph::from_btsg(&temp_btsg_path)?;
952
953 assert_eq!(loaded_graph.headers.len(), 3); assert!(loaded_graph.headers.iter().any(|h| h.tag == "TSG"));
956 assert!(loaded_graph.headers.iter().any(|h| h.tag == "reference"));
957
958 assert_eq!(loaded_graph.graphs.len(), 1);
959 assert!(loaded_graph.graphs.contains_key("test_graph".as_bytes()));
960
961 let loaded_section = &loaded_graph.graphs["test_graph".as_bytes()];
962 assert_eq!(loaded_section.node_indices.len(), 2);
963 assert_eq!(loaded_section.edge_indices.len(), 1);
964
965 Ok(())
966 }
967}