1pub mod io;
2pub mod map;
3
4mod bed_trait;
5pub use bed_trait::*;
6mod score;
7use rkyv::{Archive, Deserialize as RkyvDeserialize, Serialize as RkyvSerialize};
8pub use score::Score;
9mod strand;
10pub use strand::Strand;
11
12use std::{
13 fmt::{self, Write},
14 ops::Deref,
15 str::FromStr,
16};
17
18#[cfg(feature = "serde")]
19use serde::{Deserialize, Serialize};
20
21const DELIMITER: char = '\t';
22const MISSING_ITEM: &str = ".";
23
24#[derive(
26 Archive, RkyvSerialize, RkyvDeserialize, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash,
27)]
28#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
29pub struct GenomicRange(String, u64, u64);
30
31impl GenomicRange {
32 pub fn new<C>(chrom: C, start: u64, end: u64) -> Self
33 where
34 C: Into<String>,
35 {
36 Self(chrom.into(), start, end)
37 }
38
39 pub fn pretty_show(&self) -> String {
41 format!("{}:{}-{}", self.0, self.1, self.2)
42 }
43}
44
45impl FromStr for GenomicRange {
50 type Err = ParseError;
51
52 fn from_str(s: &str) -> Result<Self, Self::Err> {
53 let mut fields = s.split(&['\t', ':', '-']);
54 let chrom = parse_chrom(&mut fields)?;
55 let start = parse_start(&mut fields)?;
56 let end = parse_end(&mut fields)?;
57 Ok(GenomicRange::new(chrom, start, end))
58 }
59}
60
61impl BEDLike for GenomicRange {
62 fn chrom(&self) -> &str {
63 &self.0
64 }
65 fn set_chrom(&mut self, chrom: &str) -> &mut Self {
66 self.0 = chrom.to_string();
67 self
68 }
69 fn start(&self) -> u64 {
70 self.1
71 }
72 fn set_start(&mut self, start: u64) -> &mut Self {
73 self.1 = start;
74 self
75 }
76 fn end(&self) -> u64 {
77 self.2
78 }
79 fn set_end(&mut self, end: u64) -> &mut Self {
80 self.2 = end;
81 self
82 }
83 fn name(&self) -> Option<&str> {
84 None
85 }
86 fn score(&self) -> Option<Score> {
87 None
88 }
89 fn strand(&self) -> Option<Strand> {
90 None
91 }
92}
93
94impl fmt::Display for GenomicRange {
95 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
96 write!(
97 f,
98 "{}{}{}{}{}",
99 self.chrom(),
100 DELIMITER,
101 self.start(),
102 DELIMITER,
103 self.end()
104 )?;
105 Ok(())
106 }
107}
108
109#[derive(Archive, RkyvSerialize, RkyvDeserialize, Clone, Debug, Eq, PartialEq)]
111#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
112pub struct BED<const N: u8> {
113 chrom: String,
114 start: u64,
115 end: u64,
116 pub name: Option<String>,
117 pub score: Option<Score>,
118 pub strand: Option<Strand>,
119 pub optional_fields: OptionalFields,
120}
121
122impl<const N: u8> BED<N> {
123 pub fn new<C>(
124 chrom: C,
125 start: u64,
126 end: u64,
127 name: Option<String>,
128 score: Option<Score>,
129 strand: Option<Strand>,
130 optional_fields: OptionalFields,
131 ) -> Self
132 where
133 C: Into<String>,
134 {
135 Self {
136 chrom: chrom.into(),
137 start,
138 end,
139 name,
140 score,
141 strand,
142 optional_fields,
143 }
144 }
145}
146
147impl<const N: u8> BEDLike for BED<N> {
148 fn chrom(&self) -> &str {
149 &self.chrom
150 }
151 fn set_chrom(&mut self, chrom: &str) -> &mut Self {
152 self.chrom = chrom.to_string();
153 self
154 }
155 fn start(&self) -> u64 {
156 self.start
157 }
158 fn set_start(&mut self, start: u64) -> &mut Self {
159 self.start = start;
160 self
161 }
162 fn end(&self) -> u64 {
163 self.end
164 }
165 fn set_end(&mut self, end: u64) -> &mut Self {
166 self.end = end;
167 self
168 }
169 fn name(&self) -> Option<&str> {
170 self.name.as_deref()
171 }
172 fn score(&self) -> Option<Score> {
173 self.score
174 }
175 fn strand(&self) -> Option<Strand> {
176 self.strand
177 }
178}
179
180impl<const N: u8> fmt::Display for BED<N> {
182 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
183 write!(
184 f,
185 "{}{}{}{}{}",
186 self.chrom(),
187 DELIMITER,
188 self.start(),
189 DELIMITER,
190 self.end()
191 )?;
192 if N > 3 {
193 write!(f, "{}{}", DELIMITER, self.name().unwrap_or(MISSING_ITEM))?;
194 if N > 4 {
195 f.write_char(DELIMITER)?;
196 if let Some(score) = self.score() {
197 write!(f, "{}", score)?;
198 } else {
199 f.write_str(MISSING_ITEM)?;
200 }
201
202 if N > 5 {
203 f.write_char(DELIMITER)?;
204 if let Some(strand) = self.strand() {
205 write!(f, "{}", strand)?;
206 } else {
207 f.write_str(MISSING_ITEM)?;
208 }
209 }
210 }
211 }
212 Ok(())
213 }
214}
215
216impl<const N: u8> FromStr for BED<N> {
217 type Err = ParseError;
218
219 fn from_str(s: &str) -> Result<Self, Self::Err> {
220 let mut fields = s.split(DELIMITER);
221 let chrom = parse_chrom(&mut fields)?;
222 let start = parse_start(&mut fields)?;
223 let end = parse_end(&mut fields)?;
224 let name = if N > 3 {
225 parse_name(&mut fields)?
226 } else {
227 None
228 };
229 let score = if N > 4 {
230 parse_score(&mut fields)?
231 } else {
232 None
233 };
234 let strand = if N > 5 {
235 parse_strand(&mut fields)?
236 } else {
237 None
238 };
239 Ok(BED::new(
240 chrom,
241 start,
242 end,
243 name,
244 score,
245 strand,
246 OptionalFields::default(),
247 ))
248 }
249}
250
251#[derive(Archive, RkyvSerialize, RkyvDeserialize, Clone, Debug, Default, Eq, PartialEq)]
253#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
254pub struct OptionalFields(Vec<String>);
255
256impl Deref for OptionalFields {
257 type Target = [String];
258
259 fn deref(&self) -> &Self::Target {
260 &self.0
261 }
262}
263
264impl fmt::Display for OptionalFields {
265 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
266 for (i, field) in self.0.iter().enumerate() {
267 if i > 0 {
268 f.write_char(DELIMITER)?;
269 }
270
271 f.write_str(field)?;
272 }
273
274 Ok(())
275 }
276}
277
278impl From<Vec<String>> for OptionalFields {
279 fn from(fields: Vec<String>) -> Self {
280 Self(fields)
281 }
282}
283
284#[derive(Archive, RkyvSerialize, RkyvDeserialize, Clone, Debug, PartialEq)]
286#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
287pub struct NarrowPeak {
288 pub chrom: String,
289 pub start: u64,
290 pub end: u64,
291 pub name: Option<String>,
292 pub score: Option<Score>,
293 pub strand: Option<Strand>,
294 pub signal_value: f64,
295 pub p_value: Option<f64>,
296 pub q_value: Option<f64>,
297 pub peak: u64,
298}
299
300impl BEDLike for NarrowPeak {
301 fn chrom(&self) -> &str {
302 &self.chrom
303 }
304 fn set_chrom(&mut self, chrom: &str) -> &mut Self {
305 self.chrom = chrom.to_string();
306 self
307 }
308 fn start(&self) -> u64 {
309 self.start
310 }
311 fn set_start(&mut self, start: u64) -> &mut Self {
312 self.start = start;
313 self
314 }
315 fn end(&self) -> u64 {
316 self.end
317 }
318 fn set_end(&mut self, end: u64) -> &mut Self {
319 self.end = end;
320 self
321 }
322 fn name(&self) -> Option<&str> {
323 self.name.as_deref()
324 }
325 fn score(&self) -> Option<Score> {
326 self.score
327 }
328 fn strand(&self) -> Option<Strand> {
329 self.strand
330 }
331}
332
333impl fmt::Display for NarrowPeak {
334 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
335 write!(
336 f,
337 "{}{}{}{}{}{}{}",
338 self.chrom(),
339 DELIMITER,
340 self.start(),
341 DELIMITER,
342 self.end(),
343 DELIMITER,
344 self.name().unwrap_or(MISSING_ITEM),
345 )?;
346
347 f.write_char(DELIMITER)?;
348 if let Some(x) = self.score() {
349 write!(f, "{}", x)?;
350 } else {
351 f.write_str(MISSING_ITEM)?;
352 }
353 f.write_char(DELIMITER)?;
354 if let Some(x) = self.strand() {
355 write!(f, "{}", x)?;
356 } else {
357 f.write_str(MISSING_ITEM)?;
358 }
359 write!(
360 f,
361 "{}{}{}{}{}{}{}{}",
362 DELIMITER,
363 self.signal_value,
364 DELIMITER,
365 self.p_value.unwrap_or(-1.0),
366 DELIMITER,
367 self.q_value.unwrap_or(-1.0),
368 DELIMITER,
369 self.peak,
370 )?;
371
372 Ok(())
373 }
374}
375
376impl FromStr for NarrowPeak {
377 type Err = ParseError;
378
379 fn from_str(s: &str) -> Result<Self, Self::Err> {
380 let mut fields = s.split(DELIMITER);
381 Ok(Self {
382 chrom: parse_chrom(&mut fields)?.to_string(),
383 start: parse_start(&mut fields)?,
384 end: parse_end(&mut fields)?,
385 name: parse_name(&mut fields)?,
386 score: parse_score(&mut fields)?,
387 strand: parse_strand(&mut fields)?,
388 signal_value: fields.next().unwrap().parse().unwrap(),
389 p_value: parse_pvalue(&mut fields).unwrap(),
390 q_value: parse_pvalue(&mut fields).unwrap(),
391 peak: fields.next().unwrap().parse().unwrap(),
392 })
393 }
394}
395
396#[derive(Archive, RkyvSerialize, RkyvDeserialize, Clone, Debug, PartialEq)]
398#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
399pub struct BroadPeak {
400 pub chrom: String,
401 pub start: u64,
402 pub end: u64,
403 pub name: Option<String>,
404 pub score: Option<Score>,
405 pub strand: Option<Strand>,
406 pub signal_value: f64,
407 pub p_value: Option<f64>,
408 pub q_value: Option<f64>,
409}
410
411impl BEDLike for BroadPeak {
412 fn chrom(&self) -> &str {
413 &self.chrom
414 }
415 fn set_chrom(&mut self, chrom: &str) -> &mut Self {
416 self.chrom = chrom.to_string();
417 self
418 }
419 fn start(&self) -> u64 {
420 self.start
421 }
422 fn set_start(&mut self, start: u64) -> &mut Self {
423 self.start = start;
424 self
425 }
426 fn end(&self) -> u64 {
427 self.end
428 }
429 fn set_end(&mut self, end: u64) -> &mut Self {
430 self.end = end;
431 self
432 }
433 fn name(&self) -> Option<&str> {
434 self.name.as_deref()
435 }
436 fn score(&self) -> Option<Score> {
437 self.score
438 }
439 fn strand(&self) -> Option<Strand> {
440 self.strand
441 }
442}
443
444impl fmt::Display for BroadPeak {
445 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
446 write!(
447 f,
448 "{}{}{}{}{}{}{}",
449 self.chrom(),
450 DELIMITER,
451 self.start(),
452 DELIMITER,
453 self.end(),
454 DELIMITER,
455 self.name().unwrap_or(MISSING_ITEM),
456 )?;
457
458 f.write_char(DELIMITER)?;
459 if let Some(x) = self.score() {
460 write!(f, "{}", x)?;
461 } else {
462 f.write_str(MISSING_ITEM)?;
463 }
464 f.write_char(DELIMITER)?;
465 if let Some(x) = self.strand() {
466 write!(f, "{}", x)?;
467 } else {
468 f.write_str(MISSING_ITEM)?;
469 }
470 write!(
471 f,
472 "{}{}{}{}{}{}",
473 DELIMITER,
474 self.signal_value,
475 DELIMITER,
476 self.p_value.unwrap_or(-1.0),
477 DELIMITER,
478 self.q_value.unwrap_or(-1.0),
479 )?;
480
481 Ok(())
482 }
483}
484
485impl FromStr for BroadPeak {
486 type Err = ParseError;
487
488 fn from_str(s: &str) -> Result<Self, Self::Err> {
489 let mut fields = s.split(DELIMITER);
490 Ok(Self {
491 chrom: parse_chrom(&mut fields)?.to_string(),
492 start: parse_start(&mut fields)?,
493 end: parse_end(&mut fields)?,
494 name: parse_name(&mut fields)?,
495 score: parse_score(&mut fields)?,
496 strand: parse_strand(&mut fields)?,
497 signal_value: fields.next().unwrap().parse().unwrap(),
498 p_value: parse_pvalue(&mut fields).unwrap(),
499 q_value: parse_pvalue(&mut fields).unwrap(),
500 })
501 }
502}
503
504#[derive(Archive, RkyvSerialize, RkyvDeserialize, Clone, Debug, PartialEq)]
507#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
508pub struct BedGraph<V> {
509 pub chrom: String,
510 pub start: u64,
511 pub end: u64,
512 pub value: V,
513}
514
515impl<V> BedGraph<V> {
516 pub fn new<C>(chrom: C, start: u64, end: u64, value: V) -> Self
517 where
518 C: Into<String>,
519 {
520 Self {
521 chrom: chrom.into(),
522 start,
523 end,
524 value,
525 }
526 }
527
528 pub fn from_bed<B: BEDLike>(bed: &B, value: V) -> Self {
529 Self::new(bed.chrom(), bed.start(), bed.end(), value)
530 }
531}
532
533impl<V> BEDLike for BedGraph<V> {
534 fn chrom(&self) -> &str {
535 &self.chrom
536 }
537 fn set_chrom(&mut self, chrom: &str) -> &mut Self {
538 self.chrom = chrom.to_string();
539 self
540 }
541 fn start(&self) -> u64 {
542 self.start
543 }
544 fn set_start(&mut self, start: u64) -> &mut Self {
545 self.start = start;
546 self
547 }
548 fn end(&self) -> u64 {
549 self.end
550 }
551 fn set_end(&mut self, end: u64) -> &mut Self {
552 self.end = end;
553 self
554 }
555 fn name(&self) -> Option<&str> {
556 None
557 }
558 fn score(&self) -> Option<Score> {
559 None
560 }
561 fn strand(&self) -> Option<Strand> {
562 None
563 }
564}
565
566impl<V> fmt::Display for BedGraph<V>
567where
568 V: fmt::Display,
569{
570 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
571 write!(
572 f,
573 "{}{}{}{}{}{}{}",
574 self.chrom(),
575 DELIMITER,
576 self.start(),
577 DELIMITER,
578 self.end(),
579 DELIMITER,
580 self.value,
581 )
582 }
583}
584
585impl<V> FromStr for BedGraph<V>
586where
587 V: FromStr,
588 <V as FromStr>::Err: std::fmt::Debug,
589{
590 type Err = ParseError;
591
592 fn from_str(s: &str) -> Result<Self, Self::Err> {
593 let mut fields = s.split(DELIMITER);
594 Ok(Self {
595 chrom: parse_chrom(&mut fields)?.to_string(),
596 start: parse_start(&mut fields)?,
597 end: parse_end(&mut fields)?,
598 value: fields.next().unwrap().parse().unwrap(),
599 })
600 }
601}
602
603fn parse_chrom<'a, I>(fields: &mut I) -> Result<&'a str, ParseError>
604where
605 I: Iterator<Item = &'a str>,
606{
607 fields
608 .next()
609 .ok_or(ParseError::MissingReferenceSequenceName)
610}
611
612fn parse_start<'a, I>(fields: &mut I) -> Result<u64, ParseError>
613where
614 I: Iterator<Item = &'a str>,
615{
616 fields
617 .next()
618 .ok_or(ParseError::MissingStartPosition)
619 .and_then(|s| lexical::parse(s).map_err(ParseError::InvalidStartPosition))
620}
621
622fn parse_end<'a, I>(fields: &mut I) -> Result<u64, ParseError>
623where
624 I: Iterator<Item = &'a str>,
625{
626 fields
627 .next()
628 .ok_or(ParseError::MissingEndPosition)
629 .and_then(|s| lexical::parse(s).map_err(ParseError::InvalidEndPosition))
630}
631
632fn parse_name<'a, I>(fields: &mut I) -> Result<Option<String>, ParseError>
633where
634 I: Iterator<Item = &'a str>,
635{
636 fields
637 .next()
638 .ok_or(ParseError::MissingName)
639 .map(|s| match s {
640 MISSING_ITEM => None,
641 _ => Some(s.into()),
642 })
643}
644
645fn parse_score<'a, I>(fields: &mut I) -> Result<Option<Score>, ParseError>
646where
647 I: Iterator<Item = &'a str>,
648{
649 fields
650 .next()
651 .ok_or(ParseError::MissingScore)
652 .and_then(|s| match s {
653 MISSING_ITEM => Ok(None),
654 _ => s.parse().map(Some).map_err(ParseError::InvalidScore),
655 })
656}
657
658fn parse_strand<'a, I>(fields: &mut I) -> Result<Option<Strand>, ParseError>
659where
660 I: Iterator<Item = &'a str>,
661{
662 fields
663 .next()
664 .ok_or(ParseError::MissingStrand)
665 .and_then(|s| match s {
666 MISSING_ITEM => Ok(None),
667 _ => s.parse().map(Some).map_err(ParseError::InvalidStrand),
668 })
669}
670
671fn parse_pvalue<'a, I>(fields: &mut I) -> Result<Option<f64>, ParseError>
672where
673 I: Iterator<Item = &'a str>,
674{
675 fields.next().ok_or(ParseError::MissingScore).and_then(|s| {
676 let p = s.parse().unwrap();
677 if p < 0.0 {
678 Ok(None)
679 } else {
680 Ok(Some(p))
681 }
682 })
683}
684
685#[derive(Clone, Debug, Eq, PartialEq)]
687pub enum ParseError {
688 MissingReferenceSequenceName,
690 MissingStartPosition,
692 InvalidStartPosition(lexical::Error),
694 MissingEndPosition,
696 InvalidEndPosition(lexical::Error),
698 MissingName,
700 MissingScore,
702 InvalidScore(score::ParseError),
704 MissingStrand,
706 InvalidStrand(strand::ParseError),
708}
709
710#[cfg(test)]
711mod bed_tests {
712 use super::*;
713
714 #[test]
715 fn test_fmt() {
716 let fields = OptionalFields::default();
717 assert_eq!(fields.to_string(), "");
718
719 let fields = OptionalFields::from(vec![String::from("n")]);
720 assert_eq!(fields.to_string(), "n");
721
722 let fields = OptionalFields::from(vec![String::from("n"), String::from("d")]);
723 assert_eq!(fields.to_string(), "n\td");
724
725 let genomic_range = GenomicRange::new("chr1", 100, 200);
726 assert_eq!(
727 genomic_range,
728 GenomicRange::from_str("chr1\t100\t200").unwrap()
729 );
730 assert_eq!(
731 genomic_range,
732 GenomicRange::from_str("chr1-100-200").unwrap()
733 );
734 assert_eq!(
735 genomic_range,
736 GenomicRange::from_str("chr1:100-200").unwrap()
737 );
738 assert_eq!(
739 genomic_range,
740 GenomicRange::from_str("chr1:100:200").unwrap()
741 );
742 assert_eq!(
743 genomic_range,
744 GenomicRange::from_str(&genomic_range.pretty_show()).unwrap()
745 );
746 }
747}