1extern crate flate2;
2
3pub mod error;
4use crate::error::Error::{self, *};
5
6use std::io::{Read, Seek, SeekFrom, Write};
7use std::collections::VecDeque;
8use std::convert::TryInto;
9use flate2::{Decompress, FlushDecompress};
10
11
12static BIGBED_SIG: [u8; 4] = [0x87, 0x89, 0xF2, 0xEB];
13static BPT_SIG: [u8; 4] = [0x78, 0xCA, 0x8C, 0x91];
14static CIRTREE_SIG: [u8; 4] = [0x24, 0x68, 0xAC, 0xE0];
15
16
17pub trait ByteReader: Read {
19 fn read_u64(&mut self, big_endian: bool) -> u64 {
20 let mut bytes: [u8; 8] = [0;8];
21 self.read_exact(&mut bytes).unwrap();
22
23 if big_endian {
24 u64::from_be_bytes(bytes)
25 } else {
26 u64::from_le_bytes(bytes)
27 }
28 }
29
30 fn read_u32(&mut self, big_endian: bool) -> u32 {
31 let mut bytes: [u8; 4] = [0;4];
32 self.read_exact(&mut bytes).unwrap();
33
34 if big_endian {
35 u32::from_be_bytes(bytes)
36 } else {
37 u32::from_le_bytes(bytes)
38 }
39 }
40
41 fn read_u16(&mut self, big_endian: bool) -> u16 {
42 let mut bytes: [u8; 2] = [0;2];
43 self.read_exact(&mut bytes).unwrap();
44 if big_endian {
45 u16::from_be_bytes(bytes)
46 } else {
47 u16::from_le_bytes(bytes)
48 }
49 }
50
51 fn read_u8(&mut self) -> u8 {
52 let mut bytes: [u8; 1] = [0;1];
53 self.read_exact(&mut bytes).unwrap();
54 bytes[0]
55 }
56}
57
58impl<T: Read> ByteReader for T {}
59
60#[derive(Debug, PartialEq)]
61pub struct ZoomLevel {
62 reduction_level: u32,
63 reserved: u32,
64 data_offset: u64,
65 index_offset: u64,
66}
67
68#[derive(Debug, PartialEq)]
69pub struct FileOffsetSize{
70 offset: usize,
71 size: usize,
72}
73
74pub fn find_file_offset_gap(block_list: &[FileOffsetSize]) -> (&[FileOffsetSize], &[FileOffsetSize]) {
75 for (index, block) in block_list.iter().enumerate() {
76 let next = index + 1;
77 if next < block_list.len() && block_list[next].offset != block.offset + block.size {
79 return (&block_list[..next], &block_list[next..])
80 }
81 }
82 (&block_list[..], &[])
83}
84
85fn strip_null(inp: &str) -> &str {
86 let mut start = 0;
87 for (index, byte) in inp.bytes().enumerate() {
88 if start == index && byte == 0 {
89 start += 1
90 } else {
91 break
92 }
93 }
94 for (index, byte) in inp.bytes().enumerate().skip(start) {
95 if byte == 0 {
96 return &inp[start..index]
97 }
98 }
99 &inp[start..]
100}
101
102#[derive(Debug, PartialEq)]
103pub struct Chrom{
104 name: String,
105 id: u32,
106 size: u32,
107}
108
109#[derive(Debug, PartialEq)]
110pub struct BedLine {
111 chrom_id: u32,
112 start: u32,
113 end: u32,
114 rest: Option<String>,
115}
116
117#[derive(Debug)]
118struct BPlusTreeFile {
119 big_endian: bool,
120 block_size: u32,
121 key_size: usize,
122 val_size: usize,
123 item_count: u64,
124 root_offset: u64,
125}
126
127impl BPlusTreeFile {
128 fn with_reader<T: Read + Seek>(reader: &mut T) -> Result<BPlusTreeFile, Error> {
129 let mut buff = [0; 4];
131 reader.read_exact(&mut buff)?;
132 let big_endian =
133 if buff == BPT_SIG {
134 true
135 } else if buff.iter().eq(BPT_SIG.iter().rev()) {
136 false
137 } else {
138 return Err(Error::BadSig{expected: BPT_SIG, received: buff});
139 };
140
141 let block_size = reader.read_u32(big_endian);
143 let key_size = reader.read_u32(big_endian).try_into()?;
144 let val_size = reader.read_u32(big_endian).try_into()?;
145 let item_count = reader.read_u64(big_endian);
146
147 let root_offset = reader.seek(SeekFrom::Current(8))?;
149 Ok(BPlusTreeFile{big_endian, block_size, key_size, val_size, item_count, root_offset})
150 }
151
152 fn chrom_list<T: Read + Seek>(&self, reader: &mut T) -> Result<Vec<Chrom>, Error> {
154 let mut chroms: Vec<Chrom> = Vec::new();
156 let mut offsets = VecDeque::new();
157 offsets.push_back(self.root_offset);
158 while let Some(offset) = offsets.pop_front() {
159 reader.seek(SeekFrom::Start(offset))?;
161
162 let is_leaf = reader.read_u8();
164 let _reserved = reader.read_u8();
165 let child_count = reader.read_u16(self.big_endian);
166 if is_leaf != 0 {
167 let mut valbuf: Vec<u8> = vec![0; self.val_size.try_into().unwrap()];
168 for _ in 0..child_count {
169 let mut keybuf: Vec<u8> = vec![0; self.key_size.try_into().unwrap()];
170 if self.val_size != 8 {
172 panic!("Expected chromosome data to be 8 bytes not, {}", self.val_size)
173 }
174 reader.read_exact(&mut keybuf)?;
175 reader.read_exact(&mut valbuf)?;
176
177 let id = if self.big_endian {
178 u32::from_be_bytes(valbuf[0..4].try_into().unwrap())
179 } else {
180 u32::from_le_bytes(valbuf[0..4].try_into().unwrap())
181 };
182 let size = if self.big_endian {
183 u32::from_be_bytes(valbuf[4..8].try_into().unwrap())
184 } else {
185 u32::from_le_bytes(valbuf[4..8].try_into().unwrap())
186 };
187 let chrom = Chrom{
188 name: String::from_utf8(keybuf).unwrap(), id, size
189 };
190 chroms.push(chrom);
191 }
192 } else {
193 for _ in 0..child_count {
194 reader.seek(SeekFrom::Current(self.key_size.try_into()?))?;
198 let offset = reader.read_u64(self.big_endian);
200 offsets.push_back(offset);
201 }
202 }
203 }
204 Ok(chroms)
205 }
206
207 fn find<T: Read + Seek>(&self, chrom: &str, reader: &mut T) -> Result<Option<Chrom>, Error> {
209 if chrom.len() > self.key_size {
210 return Err(Error::BadKey(chrom.to_owned(), self.key_size))
211 }
212 if chrom.len() != (self.key_size) {
214 let mut padded_key = String::with_capacity(self.key_size);
216 padded_key.push_str(chrom);
217
218 let needed: usize = self.key_size - chrom.len();
219 for _ in 0..needed {
220 padded_key.push('\0');
221 }
222 self._find_internal(&padded_key, reader)
223 } else {
224 self._find_internal(chrom, reader)
225 }
226 }
227
228 fn _find_internal<T: Read + Seek>(&self, chrom: &str, reader: &mut T) -> Result<Option<Chrom>, Error> {
229 let mut offsets = VecDeque::new();
230 offsets.push_back(self.root_offset);
231 while let Some(offset) = offsets.pop_front() {
232 reader.seek(SeekFrom::Start(offset))?;
234
235 let is_leaf = reader.read_u8();
237 let _reserved = reader.read_u8();
238 let child_count = reader.read_u16(self.big_endian);
239 if is_leaf != 0 {
240 let mut valbuf: Vec<u8> = vec![0; self.val_size.try_into().unwrap()];
241 for _ in 0..child_count {
242 let mut keybuf: Vec<u8> = vec![0; self.key_size.try_into().unwrap()];
243 reader.read(&mut keybuf)?;
244 reader.read(&mut valbuf)?;
245 let other_key = String::from_utf8(keybuf).unwrap();
246 if other_key == chrom {
247 if self.val_size != 8 {
248 panic!("Expected chromosome data to be 8 bytes not, {}", self.val_size)
249 }
250 let id = if self.big_endian {
251 u32::from_be_bytes(valbuf[0..4].try_into().unwrap())
252 } else {
253 u32::from_le_bytes(valbuf[0..4].try_into().unwrap())
254 };
255 let size = if self.big_endian {
256 u32::from_be_bytes(valbuf[4..8].try_into().unwrap())
257 } else {
258 u32::from_le_bytes(valbuf[4..8].try_into().unwrap())
259 };
260 return Ok(Some(Chrom{name: other_key, id, size}))
262 }
263 }
264 } else {
265 reader.seek(SeekFrom::Current(self.key_size.try_into()?))?;
267 let mut prev_offset = reader.read_u64(self.big_endian);
269 for _ in 1..child_count {
270 let mut keybuf: Vec<u8> = vec![0; self.key_size];
271 reader.read(&mut keybuf)?;
272 let other_key = String::from_utf8(keybuf).unwrap();
273 if chrom < &other_key {
275 break;
276 }
277 prev_offset = reader.read_u64(self.big_endian);
279 }
280 offsets.push_back(prev_offset);
281 }
282 }
283 Ok(None)
284 }
285}
286
287#[derive(Debug)]
288struct CIRTreeFile {
289 big_endian: bool,
290 block_size: u32,
291 item_count: u64,
292 start_chrom_ix: u32,
293 start_base: u32,
294 end_chrom_ix: u32,
295 end_base: u32,
296 file_size: u64,
297 items_per_slot: u32,
298 root_offset: u64,
299}
300
301fn cir_overlaps(q_chrom: u32, q_start: u32, q_end: u32,
302 start_chrom: u32, start_base: u32,
303 end_chrom: u32, end_base: u32) -> bool {
304 (q_chrom, q_start) < (end_chrom, end_base)
305 && (q_chrom, q_end) > (start_chrom, start_base)
306}
307
308impl CIRTreeFile {
309 fn with_reader<T: Read + Seek>(reader: &mut T) -> Result<CIRTreeFile, Error> {
310 let mut buff = [0; 4];
312 reader.read_exact(&mut buff)?;
313 let big_endian =
314 if buff == CIRTREE_SIG {
315 true
316 } else if buff.iter().eq(CIRTREE_SIG.iter().rev()) {
317 false
318 } else {
319 return Err(Error::BadSig{expected: CIRTREE_SIG, received: buff});
320 };
321
322 let block_size = reader.read_u32(big_endian);
324 let item_count = reader.read_u64(big_endian);
325 let start_chrom_ix = reader.read_u32(big_endian);
326 let start_base = reader.read_u32(big_endian);
327 let end_chrom_ix = reader.read_u32(big_endian);
328 let end_base = reader.read_u32(big_endian);
329 let file_size = reader.read_u64(big_endian);
330 let items_per_slot = reader.read_u32(big_endian);
331
332 let root_offset = reader.seek(SeekFrom::Current(4))?;
334
335 Ok(CIRTreeFile{
336 big_endian,
337 block_size,
338 item_count,
339 start_chrom_ix,
340 start_base,
341 end_chrom_ix,
342 end_base,
343 file_size,
344 items_per_slot,
345 root_offset,
346 })
347 }
348
349 fn find_blocks<T: Read + Seek>(&self, chrom_id: u32, start: u32, end: u32, reader: &mut T) -> Result<Vec<FileOffsetSize>, Error> {
350 let mut blocks = Vec::<FileOffsetSize>::new();
351 let mut offsets = VecDeque::new();
352 offsets.push_back(self.root_offset);
353 while let Some(offset) = offsets.pop_front() {
354 reader.seek(SeekFrom::Start(offset))?;
356
357 let is_leaf = reader.read_u8();
359 let _reserved = reader.read_u8();
360 let child_count = reader.read_u16(self.big_endian);
361
362 if is_leaf != 0 {
363 for _ in 0..child_count {
364 let start_chrom = reader.read_u32(self.big_endian);
365 let start_base = reader.read_u32(self.big_endian);
366 let end_chrom = reader.read_u32(self.big_endian);
367 let end_base = reader.read_u32(self.big_endian);
368 let offset = reader.read_u64(self.big_endian).try_into()?;
369 let size = reader.read_u64(self.big_endian).try_into()?;
370 if cir_overlaps(chrom_id, start, end, start_chrom, start_base, end_chrom, end_base) {
373 blocks.push(FileOffsetSize{offset, size})
374 }
375 }
376 } else {
377 for _ in 0..child_count {
378 let start_chrom = reader.read_u32(self.big_endian);
380 let start_base = reader.read_u32(self.big_endian);
381 let end_chrom = reader.read_u32(self.big_endian);
382 let end_base = reader.read_u32(self.big_endian);
383 let offset = reader.read_u64(self.big_endian);
384
385 if cir_overlaps(chrom_id, start, end, start_chrom, start_base, end_chrom, end_base) {
389 offsets.push_back(offset);
390 }
391 }
392 }
393 }
394 Ok(blocks)
395 }
396}
397
398#[derive(Debug)]
399pub struct BigBed<T: Read + Seek> {
400 reader: T,
401 pub big_endian: bool,
402 pub version: u16,
403 pub zoom_levels: u16,
404 pub chrom_tree_offset: u64,
405 pub unzoomed_data_offset: u64,
406 pub unzoomed_index_offset: u64,
407 pub field_count: u16,
408 pub defined_field_count: u16,
409 pub as_offset: u64,
410 pub total_summary_offset: u64,
411 pub uncompress_buf_size: usize,
412 pub extension_offset: u64,
413 pub level_list: Vec<ZoomLevel>,
414 pub extension_size: Option<u16>,
415 pub extra_index_count: Option<u16>,
416 pub extra_index_list_offset: Option<u64>,
417 chrom_bpt: BPlusTreeFile,
418 unzoomed_cir: Option<CIRTreeFile>,
419}
420
421impl<T: Read + Seek> BigBed<T> {
422 pub fn from_file(mut reader: T) -> Result<BigBed<T>, Error> {
423 let mut buff = [0; 4];
424 reader.read_exact(&mut buff)?;
425 let big_endian =
426 if buff == BIGBED_SIG {
427 true
428 } else if buff.iter().eq(BIGBED_SIG.iter().rev()) {
429 false
430 } else {
431 return Err(Error::BadSig{expected: BIGBED_SIG, received: buff});
432 };
433 let version = reader.read_u16(big_endian);
434 let zoom_levels = reader.read_u16(big_endian);
435 let chrom_tree_offset = reader.read_u64(big_endian);
436 let unzoomed_data_offset = reader.read_u64(big_endian);
437 let unzoomed_index_offset = reader.read_u64(big_endian);
438 let field_count = reader.read_u16(big_endian);
439 let defined_field_count = reader.read_u16(big_endian);
440 let as_offset = reader.read_u64(big_endian);
441 let total_summary_offset = reader.read_u64(big_endian);
442 let uncompress_buf_size = reader.read_u32(big_endian).try_into()?;
443 let extension_offset = reader.read_u64(big_endian);
444
445 let mut level_list: Vec<ZoomLevel> = Vec::with_capacity(usize::from(zoom_levels));
446 for _ in 0..usize::from(zoom_levels) {
447 level_list.push(ZoomLevel{
448 reduction_level: reader.read_u32(big_endian),
449 reserved: reader.read_u32(big_endian),
450 data_offset: reader.read_u64(big_endian),
451 index_offset: reader.read_u64(big_endian)
452 })
453 }
454
455 let mut extension_size = None;
456 let mut extra_index_count = None;
457 let mut extra_index_list_offset = None;
458
459 if extension_offset != 0 {
460 reader.seek(SeekFrom::Start(extension_offset))?;
462 extension_size = Some(reader.read_u16(big_endian));
463 extra_index_count = Some(reader.read_u16(big_endian));
464 extra_index_list_offset = Some(reader.read_u64(big_endian));
465 }
466
467 reader.seek(SeekFrom::Start(chrom_tree_offset))?;
469 let chrom_bpt = BPlusTreeFile::with_reader(&mut reader)?;
470
471 Ok(BigBed{
472 reader, big_endian, version, zoom_levels, chrom_tree_offset,
473 unzoomed_data_offset, unzoomed_index_offset, field_count,
474 defined_field_count, as_offset, total_summary_offset,
475 uncompress_buf_size, extension_offset, level_list,
476 extension_size, extra_index_count, extra_index_list_offset,
477 chrom_bpt, unzoomed_cir: None,
478 })
479 }
480
481 pub fn attach_unzoomed_cir(&mut self) -> Result<(), Error>{
482 if self.unzoomed_cir.is_none() {
483 self.reader.seek(SeekFrom::Start(self.unzoomed_index_offset))?;
485 self.unzoomed_cir = Some(
487 CIRTreeFile::with_reader(&mut self.reader)?
488 );
489 }
490 Ok(())
491 }
492
493 pub fn overlapping_blocks(&mut self, chrom_id: u32,
494 start: u32, end: u32) -> Result<Vec<FileOffsetSize>, Error> {
495
496 self.attach_unzoomed_cir()?;
498 let index = self.unzoomed_cir.as_ref().unwrap();
500 Ok(index.find_blocks(chrom_id, start, end, &mut self.reader)?)
501 }
502
503 pub fn query(&mut self, chrom: &str, start: u32, end: u32, max_items: u32) -> Result<Vec<BedLine>, Error> {
504 let mut lines: Vec<BedLine> = Vec::new();
505 let mut item_count: u32 = 0;
506
507 let chrom_id: Option<u32>;
508 if let Some(chrom_data) = self.find_chrom(chrom)? {
510 chrom_id = Some(chrom_data.id);
511 } else if let Some(chrom_data) = self.find_chrom(&chrom[3..])? {
513 chrom_id = Some(chrom_data.id);
514 } else {
515 return Err(BadChrom(chrom.to_owned()));
516 }
517 let chrom_id = chrom_id.unwrap();
519 let padded_start = if start > 0 {start - 1} else {start};
522 let padded_end = end + 1;
523 let blocks = self.overlapping_blocks(chrom_id, padded_start, padded_end)?;
524
525 let mut decompressor = None;
526 let mut decom_buff = None;
527 if self.uncompress_buf_size > 0 {
528 decompressor = Some(Decompress::new(true));
529 decom_buff = Some(vec![0u8; self.uncompress_buf_size]);
530 }
531
532 let mut remaining = &blocks[..];
533 while remaining.len() > 0 {
534 let split = find_file_offset_gap(remaining);
536 let before_gap = split.0;
537 remaining = split.1;
538
539 let merged_offset = before_gap[0].offset;
541 let merged_size = before_gap.last().unwrap().offset + before_gap.last().unwrap().size - merged_offset;
545 let mut merged_buff: Vec<u8> = vec![0; merged_size as usize];
547 self.reader.seek(SeekFrom::Start(merged_offset.try_into()?))?;
548 self.reader.read_exact(&mut merged_buff)?;
549
550
551 for block in before_gap {
553 let mut index: usize = 0;
554 let block_start = block.offset - merged_offset;
555 let mut block_end = block_start + block.size;
556 let mut buff = &merged_buff[block_start..block_end];
557 if self.uncompress_buf_size > 0 {
558 let debuff = decom_buff.as_mut().unwrap();
559 let decomp = decompressor.as_mut().unwrap();
560 let status = decomp.decompress(&buff, debuff, FlushDecompress::Finish)?;
561 match status {
562 flate2::Status::Ok | flate2::Status::StreamEnd => {}
563 _ => {
564 eprintln!("{:?}", status);
565 return Err(Error::Misc("Decompression error!"));
566 }
567 }
568 block_end = decomp.total_out() as usize;
569 decomp.reset(true);
570 buff = &*debuff;
571 }
572 while index < block_end {
574 let bytes: [u8; 4] = buff[index..index+4].try_into().expect("Failed to convert bytes");
576 let chr = if self.big_endian {u32::from_be_bytes(bytes)} else {u32::from_le_bytes(bytes)};
577 index += 4;
578 let bytes: [u8; 4] = buff[index..index+4].try_into().expect("Failed to convert bytes");
580 let s = if self.big_endian {u32::from_be_bytes(bytes)} else {u32::from_le_bytes(bytes)};
581 index += 4;
582 let bytes: [u8; 4] = buff[index..index+4].try_into().expect("Failed to convert bytes");
584 let e = if self.big_endian {u32::from_be_bytes(bytes)} else {u32::from_le_bytes(bytes)};
585 index += 4;
586
587 let mut rest_length = 0;
590 for (index, byte) in buff[index..block_end].iter().enumerate() {
591 if byte == &0 {
592 rest_length = index;
593 break;
594 }
595 }
596 if chr == chrom_id && ( (s < end && e > start) || (s == e && (s == end || end == start) )) {
598 item_count += 1;
599 if max_items > 0 && item_count > max_items {
600 break;
601 }
602 let rest = if rest_length > 0 {
604 Some(String::from_utf8(buff[index..rest_length+index].to_vec()).expect("FUCK"))
605 } else {
606 None
607 };
608 lines.push(BedLine{
610 chrom_id: chr,
611 start: s,
612 end: e,
613 rest
614 });
615 }
616 index += rest_length + 1;
618 }
619 if max_items > 0 && item_count > max_items {
621 break;
622 }
623 }
624 if max_items > 0 && item_count > max_items {
625 break;
626 }
627 }
628 Ok(lines)
629 }
630
631 pub fn write_bed(&mut self, chrom: Option<&str>, start: Option<u32>, end: Option<u32>, max_items: Option<u32>, mut output: impl Write) -> Result<(), Error> {
632 let item_count = 0;
633 for chrom_data in self.chrom_list()? {
634 if let Some(name) = chrom {
636 if name != strip_null(&chrom_data.name) {
637 continue
638 }
639 }
640 let start = match start {
641 None => 0,
642 Some(value) => value,
643 };
644 let end = match end {
645 None => chrom_data.size,
646 Some(value) => value,
647 };
648 let mut items_left = 0;
650 if let Some(max_value) = max_items {
651 items_left = max_value - item_count;
652 if items_left <= 0 {
654 break;
655 }
656 }
657
658 let name_to_print = strip_null(&chrom_data.name);
659 let interval_list = self.query(&chrom_data.name, start, end, items_left)?;
660 for bed_line in interval_list.into_iter() {
661 match bed_line.rest {
662 None => {
663 output.write(format!("{}\t{}\t{}\n", name_to_print, bed_line.start, bed_line.end).as_bytes())?;
664 } Some(data) => {
665 output.write(format!("{}\t{}\t{}\t{}\n", name_to_print, bed_line.start, bed_line.end, data).as_bytes())?;
666 }
667 }
668 }
669 }
670 Ok(())
671 }
672
673
674 pub fn to_string(&mut self, chrom: Option<&str>, start: Option<u32>, end: Option<u32>, max_items: Option<u32>) -> Result<Vec<String>, Error> {
675 let mut output: Vec<String> = Vec::new();
677 let item_count = 0;
678 for chrom_data in self.chrom_list()? {
679 if let Some(name) = chrom {
681 if name != strip_null(&chrom_data.name) {
682 continue
683 }
684 }
685 let start = match start {
686 None => 0,
687 Some(value) => value,
688 };
689 let end = match end {
690 None => chrom_data.size,
691 Some(value) => value,
692 };
693 let mut items_left = 0;
695 if let Some(max_value) = max_items {
696 items_left = max_value - item_count;
697 if items_left <= 0 {
699 break;
700 }
701 }
702
703 let name_to_print = strip_null(&chrom_data.name);
704 let interval_list = self.query(&chrom_data.name, start, end, items_left)?;
705 for bed_line in interval_list.into_iter() {
706 match bed_line.rest {
707 None => {
708 output.push(format!("{}\t{}\t{}\n", name_to_print, bed_line.start, bed_line.end));
709 } Some(data) => {
710 output.push(format!("{}\t{}\t{}\t{}\n", name_to_print, bed_line.start, bed_line.end, data));
711 }
712 }
713 }
714 }
715 Ok(output)
716 }
717
718 pub fn chrom_list(&mut self) -> Result<Vec<Chrom>, Error> {
719 self.chrom_bpt.chrom_list(&mut self.reader)
720 }
721
722 pub fn find_chrom(&mut self, chrom: &str) -> Result<Option<Chrom>, Error> {
723 self.chrom_bpt.find(chrom, &mut self.reader)
724 }
725}
726
727#[cfg(test)]
728mod test_bb {
729 use std::fs::File;
730 use super::*;
731
732 fn bb_from_file(filename: &str) -> Result<BigBed<File>, Error> {
734 BigBed::from_file(File::open(filename)?)
735 }
736
737 #[test]
739 fn from_file_not_bigbed() {
740 let result = bb_from_file("test/beds/empty.bed").unwrap_err();
742 if let Error::IOError(_) = result {
743 } else {
745 panic!("Expected IOError, received {:?}", result)
746 }
747 let result = bb_from_file("test/beds/one.bed").unwrap_err();
748 assert_eq!(result, Error::BadSig{expected: BIGBED_SIG, received: [99, 104, 114, 55]});
749 let result = bb_from_file("test/notbed.png").unwrap_err();
750 assert_eq!(result, Error::BadSig{expected: BIGBED_SIG, received: [137, 80, 78, 71]});
751 }
752
753 #[test]
755 fn from_file_onebed() {
756 let bb = bb_from_file("test/bigbeds/one.bb").unwrap();
757 assert_eq!(bb.as_offset, 304);
758 assert_eq!(bb.chrom_tree_offset, 628);
759 assert_eq!(bb.defined_field_count, 3);
760 assert_eq!(bb.extension_offset, 564);
761 assert_eq!(bb.extension_size, Some(64));
762 assert_eq!(bb.extra_index_count, Some(0));
763 assert_eq!(bb.extra_index_list_offset, Some(0));
764 assert_eq!(bb.field_count, 3);
765 assert_eq!(bb.big_endian, false);
766 assert_eq!(bb.total_summary_offset, 524);
767 assert_eq!(bb.uncompress_buf_size, 16384);
768 assert!(bb.unzoomed_cir.is_none());
769 assert_eq!(bb.unzoomed_data_offset, 676);
770 assert_eq!(bb.unzoomed_index_offset, 700);
771 assert_eq!(bb.version, 4);
772 assert_eq!(bb.zoom_levels, 1);
773 assert_eq!(bb.level_list, vec![
774 ZoomLevel{reduction_level: 107485656, reserved: 0, data_offset: 6904, index_offset: 6936}
775 ])
776 }
777
778 #[test]
779 fn from_file_longbed() {
780 let bb = bb_from_file("test/bigbeds/long.bb").unwrap();
781 assert_eq!(bb.as_offset, 304);
782 assert_eq!(bb.chrom_tree_offset, 628);
783 assert_eq!(bb.defined_field_count, 3);
784 assert_eq!(bb.extension_offset, 564);
785 assert_eq!(bb.extension_size, Some(64));
786 assert_eq!(bb.extra_index_count, Some(0));
787 assert_eq!(bb.extra_index_list_offset, Some(0));
788 assert_eq!(bb.field_count, 3);
789 assert_eq!(bb.big_endian, false);
790 assert_eq!(bb.total_summary_offset, 524);
791 assert_eq!(bb.uncompress_buf_size, 16384);
792 assert!(bb.unzoomed_cir.is_none());
793 assert_eq!(bb.unzoomed_data_offset, 976);
794 assert_eq!(bb.unzoomed_index_offset, 80369);
795 assert_eq!(bb.version, 4);
796 assert_eq!(bb.zoom_levels, 5);
797 assert_eq!(bb.level_list, vec![
798 ZoomLevel{reduction_level: 2440976, reserved: 0, data_offset: 86757, index_offset: 106847},
799 ZoomLevel{reduction_level: 9763904, reserved: 0, data_offset: 113067, index_offset: 119611},
800 ZoomLevel{reduction_level: 39055616, reserved: 0, data_offset: 125815, index_offset: 127568},
801 ZoomLevel{reduction_level: 156222464, reserved: 0, data_offset: 133772, index_offset: 134387},
802 ZoomLevel{reduction_level: 624889856, reserved: 0, data_offset: 140591, index_offset: 141086}
803 ]);
804 }
805
806 #[test]
807 fn test_chrom_list() {
808 let mut bb = bb_from_file("test/bigbeds/one.bb").unwrap();
809 assert_eq!(bb.chrom_list().unwrap(), vec![Chrom{name: String::from("chr7"), id: 0, size: 159345973}]);
811 assert_eq!(bb.chrom_list().unwrap(), vec![Chrom{name: String::from("chr7"), id: 0, size: 159345973}]);
813 let mut bb = bb_from_file("test/bigbeds/long.bb").unwrap();
815 assert_eq!(bb.chrom_list().unwrap(), vec![
816 Chrom{name: String::from("chr1\0"), id: 0, size: 248956422},
817 Chrom{name: String::from("chr10"), id: 1, size: 133797422},
818 Chrom{name: String::from("chr11"), id: 2, size: 135086622},
819 Chrom{name: String::from("chr12"), id: 3, size: 133275309},
820 Chrom{name: String::from("chr13"), id: 4, size: 114364328},
821 Chrom{name: String::from("chr14"), id: 5, size: 107043718},
822 Chrom{name: String::from("chr15"), id: 6, size: 101991189},
823 Chrom{name: String::from("chr16"), id: 7, size: 90338345},
824 Chrom{name: String::from("chr17"), id: 8, size: 83257441},
825 Chrom{name: String::from("chr18"), id: 9, size: 80373285},
826 Chrom{name: String::from("chr19"), id: 10, size: 58617616},
827 Chrom{name: String::from("chr2\0"), id: 11, size: 242193529},
828 Chrom{name: String::from("chr20"), id: 12, size: 64444167},
829 Chrom{name: String::from("chr21"), id: 13, size: 46709983},
830 Chrom{name: String::from("chr22"), id: 14, size: 50818468},
831 Chrom{name: String::from("chr3\0"), id: 15, size: 198295559},
832 Chrom{name: String::from("chr4\0"), id: 16, size: 190214555},
833 Chrom{name: String::from("chr5\0"), id: 17, size: 181538259},
834 Chrom{name: String::from("chr6\0"), id: 18, size: 170805979},
835 Chrom{name: String::from("chr7\0"), id: 19, size: 159345973},
836 Chrom{name: String::from("chr8\0"), id: 20, size: 145138636},
837 Chrom{name: String::from("chr9\0"), id: 21, size: 138394717},
838 Chrom{name: String::from("chrX\0"), id: 22, size: 156040895},
839 Chrom{name: String::from("chrY\0"), id: 23, size: 57227415}
840 ]);
841 let mut bb = bb_from_file("test/bigbeds/tair10-nochr.bb").unwrap();
842 assert_eq!(bb.chrom_list().unwrap(), vec![
843 Chrom{name: String::from("1"), id: 0, size: 30427671},
844 Chrom{name: String::from("2"), id: 1, size: 19698289},
845 Chrom{name: String::from("3"), id: 2, size: 23459830},
846 Chrom{name: String::from("4"), id: 3, size: 18585056},
847 Chrom{name: String::from("5"), id: 4, size: 26975502},
848 Chrom{name: String::from("C"), id: 5, size: 154478},
849 Chrom{name: String::from("M"), id: 6, size: 366924}
850 ]);
851 let mut bb = bb_from_file("test/bigbeds/tair10.bb").unwrap();
852 assert_eq!(bb.chrom_list().unwrap(), vec![
853 Chrom{name: String::from("Chr1"), id: 0, size: 30427671},
854 Chrom{name: String::from("Chr2"), id: 1, size: 19698289},
855 Chrom{name: String::from("Chr3"), id: 2, size: 23459830},
856 Chrom{name: String::from("Chr4"), id: 3, size: 18585056},
857 Chrom{name: String::from("Chr5"), id: 4, size: 26975502},
858 Chrom{name: String::from("ChrC"), id: 5, size: 154478},
859 Chrom{name: String::from("ChrM"), id: 6, size: 366924}
860 ]);
861 let mut bb = bb_from_file("test/bigbeds/mm10.bb").unwrap();
863 assert_eq!(bb.chrom_list().unwrap(), vec![
864 Chrom{name: String::from("chr1\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"), id: 0, size: 195471971},
865 Chrom{name: String::from("chr10\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"), id: 1, size: 130694993},
866 Chrom{name: String::from("chr11\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"), id: 2, size: 122082543},
867 Chrom{name: String::from("chr12\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"), id: 3, size: 120129022},
868 Chrom{name: String::from("chr13\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"), id: 4, size: 120421639},
869 Chrom{name: String::from("chr14\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"), id: 5, size: 124902244},
870 Chrom{name: String::from("chr15\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"), id: 6, size: 104043685},
871 Chrom{name: String::from("chr16\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"), id: 7, size: 98207768},
872 Chrom{name: String::from("chr17\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"), id: 8, size: 94987271},
873 Chrom{name: String::from("chr18\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"), id: 9, size: 90702639},
874 Chrom{name: String::from("chr19\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"), id: 10, size: 61431566},
875 Chrom{name: String::from("chr1_GL456210_random"), id: 11, size: 169725},
876 Chrom{name: String::from("chr1_GL456211_random"), id: 12, size: 241735},
877 Chrom{name: String::from("chr1_GL456212_random"), id: 13, size: 153618},
878 Chrom{name: String::from("chr1_GL456213_random"), id: 14, size: 39340},
879 Chrom{name: String::from("chr1_GL456221_random"), id: 15, size: 206961},
880 Chrom{name: String::from("chr2\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"), id: 16, size: 182113224},
881 Chrom{name: String::from("chr3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"), id: 17, size: 160039680},
882 Chrom{name: String::from("chr4\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"), id: 18, size: 156508116},
883 Chrom{name: String::from("chr4_GL456216_random"), id: 19, size: 66673},
884 Chrom{name: String::from("chr4_GL456350_random"), id: 20, size: 227966},
885 Chrom{name: String::from("chr4_JH584292_random"), id: 21, size: 14945},
886 Chrom{name: String::from("chr4_JH584293_random"), id: 22, size: 207968},
887 Chrom{name: String::from("chr4_JH584294_random"), id: 23, size: 191905},
888 Chrom{name: String::from("chr4_JH584295_random"), id: 24, size: 1976},
889 Chrom{name: String::from("chr5\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"), id: 25, size: 151834684},
890 Chrom{name: String::from("chr5_GL456354_random"), id: 26, size: 195993},
891 Chrom{name: String::from("chr5_JH584296_random"), id: 27, size: 199368},
892 Chrom{name: String::from("chr5_JH584297_random"), id: 28, size: 205776},
893 Chrom{name: String::from("chr5_JH584298_random"), id: 29, size: 184189},
894 Chrom{name: String::from("chr5_JH584299_random"), id: 30, size: 953012},
895 Chrom{name: String::from("chr6\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"), id: 31, size: 149736546},
896 Chrom{name: String::from("chr7\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"), id: 32, size: 145441459},
897 Chrom{name: String::from("chr7_GL456219_random"), id: 33, size: 175968},
898 Chrom{name: String::from("chr8\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"), id: 34, size: 129401213},
899 Chrom{name: String::from("chr9\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"), id: 35, size: 124595110},
900 Chrom{name: String::from("chrM\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"), id: 36, size: 16299},
901 Chrom{name: String::from("chrUn_GL456239\0\0\0\0\0\0"), id: 37, size: 40056},
902 Chrom{name: String::from("chrUn_GL456359\0\0\0\0\0\0"), id: 38, size: 22974},
903 Chrom{name: String::from("chrUn_GL456360\0\0\0\0\0\0"), id: 39, size: 31704},
904 Chrom{name: String::from("chrUn_GL456366\0\0\0\0\0\0"), id: 40, size: 47073},
905 Chrom{name: String::from("chrUn_GL456367\0\0\0\0\0\0"), id: 41, size: 42057},
906 Chrom{name: String::from("chrUn_GL456368\0\0\0\0\0\0"), id: 42, size: 20208},
907 Chrom{name: String::from("chrUn_GL456370\0\0\0\0\0\0"), id: 43, size: 26764},
908 Chrom{name: String::from("chrUn_GL456372\0\0\0\0\0\0"), id: 44, size: 28664},
909 Chrom{name: String::from("chrUn_GL456378\0\0\0\0\0\0"), id: 45, size: 31602},
910 Chrom{name: String::from("chrUn_GL456379\0\0\0\0\0\0"), id: 46, size: 72385},
911 Chrom{name: String::from("chrUn_GL456381\0\0\0\0\0\0"), id: 47, size: 25871},
912 Chrom{name: String::from("chrUn_GL456382\0\0\0\0\0\0"), id: 48, size: 23158},
913 Chrom{name: String::from("chrUn_GL456383\0\0\0\0\0\0"), id: 49, size: 38659},
914 Chrom{name: String::from("chrUn_GL456385\0\0\0\0\0\0"), id: 50, size: 35240},
915 Chrom{name: String::from("chrUn_GL456387\0\0\0\0\0\0"), id: 51, size: 24685},
916 Chrom{name: String::from("chrUn_GL456389\0\0\0\0\0\0"), id: 52, size: 28772},
917 Chrom{name: String::from("chrUn_GL456390\0\0\0\0\0\0"), id: 53, size: 24668},
918 Chrom{name: String::from("chrUn_GL456392\0\0\0\0\0\0"), id: 54, size: 23629},
919 Chrom{name: String::from("chrUn_GL456393\0\0\0\0\0\0"), id: 55, size: 55711},
920 Chrom{name: String::from("chrUn_GL456394\0\0\0\0\0\0"), id: 56, size: 24323},
921 Chrom{name: String::from("chrUn_GL456396\0\0\0\0\0\0"), id: 57, size: 21240},
922 Chrom{name: String::from("chrUn_JH584304\0\0\0\0\0\0"), id: 58, size: 114452},
923 Chrom{name: String::from("chrX\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"), id: 59, size: 171031299},
924 Chrom{name: String::from("chrX_GL456233_random"), id: 60, size: 336933},
925 Chrom{name: String::from("chrY\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"), id: 61, size: 91744698},
926 Chrom{name: String::from("chrY_JH584300_random"), id: 62, size: 182347},
927 Chrom{name: String::from("chrY_JH584301_random"), id: 63, size: 259875},
928 Chrom{name: String::from("chrY_JH584302_random"), id: 64, size: 155838},
929 Chrom{name: String::from("chrY_JH584303_random"), id: 65, size: 158099}
930 ]);
931 }
932
933 #[test]
934 fn test_find_chrom_one() {
935 let mut bb = bb_from_file("test/bigbeds/one.bb").unwrap();
936 assert_eq!(bb.find_chrom("chr1").unwrap(), None);
937 assert_eq!(bb.find_chrom("chr7").unwrap(), Some(Chrom{name: String::from("chr7"), id: 0, size: 159345973}));
938 assert_eq!(bb.find_chrom("chr7").unwrap(), Some(Chrom{name: String::from("chr7"), id: 0, size: 159345973}));
940 assert_eq!(bb.find_chrom("chr").unwrap(), None);
941 assert_eq!(bb.find_chrom("chr79"), Err(Error::BadKey(String::from("chr79"), 4)));
943 assert_eq!(bb.find_chrom("cHr7").unwrap(), None);
945 assert_eq!(bb.find_chrom("xhr7").unwrap(), None);
947 }
948
949 #[test]
950 fn test_find_chrom_long() {
951 let mut bb = bb_from_file("test/bigbeds/long.bb").unwrap();
952 assert_eq!(bb.find_chrom("chr2\0").unwrap(), Some(Chrom{name: String::from("chr2\0"), id: 11, size: 242193529}));
953 assert_eq!(bb.find_chrom("chr2").unwrap(), Some(Chrom{name: String::from("chr2\0"), id: 11, size: 242193529}));
955 assert_eq!(bb.find_chrom("2").unwrap(), None);
957 assert_eq!(bb.find_chrom("chr2xx"), Err(Error::BadKey(String::from("chr2xx"), 5)));
959 }
960
961 #[test]
962 fn test_overlapping_blocks() {
963 let mut bb = bb_from_file("test/bigbeds/long.bb").unwrap();
964 assert_eq!(bb.overlapping_blocks(0, 100, 1000000), Ok(vec![FileOffsetSize{offset: 984, size: 3324}]));
965 assert_eq!(bb.overlapping_blocks(0, 100000, 10), Ok(vec![]));
967 assert_eq!(bb.overlapping_blocks(20, 131366255, 132257727), Ok(vec![FileOffsetSize{offset: 67045, size: 3295}]));
969 assert_eq!(bb.overlapping_blocks(42, 100000, 10), Ok(vec![]));
971 }
972}