1pub mod io;
2pub mod map;
3
4mod bed_trait;
5pub use bed_trait::*;
6mod score;
7use bincode::{Decode, Encode};
8pub use score::Score;
9mod strand;
10pub use strand::Strand;
11
12use std::{fmt::{self, Write}, ops::Deref, str::FromStr};
13
14#[cfg(feature = "serde")]
15use serde::{Deserialize, Serialize};
16
17const DELIMITER: char = '\t';
18const MISSING_ITEM : &str = ".";
19
20#[derive(Encode, Decode, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
22#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
23pub struct GenomicRange(String, u64, u64);
24
25impl GenomicRange {
26 pub fn new<C>(chrom: C, start: u64, end: u64) -> Self
27 where
28 C: Into<String>,
29 { Self(chrom.into(), start, end) }
30
31 pub fn pretty_show(&self) -> String {
33 format!("{}:{}-{}", self.0, self.1, self.2)
34 }
35}
36
37impl FromStr for GenomicRange {
42 type Err = ParseError;
43
44 fn from_str(s: &str) -> Result<Self, Self::Err> {
45 let mut fields = s.split(&['\t', ':', '-']);
46 let chrom = parse_chrom(&mut fields)?;
47 let start = parse_start(&mut fields)?;
48 let end = parse_end(&mut fields)?;
49 Ok(GenomicRange::new(chrom, start, end))
50 }
51}
52
53impl BEDLike for GenomicRange {
54 fn chrom(&self) -> &str { &self.0 }
55 fn set_chrom(&mut self, chrom: &str) -> &mut Self {
56 self.0 = chrom.to_string();
57 self
58 }
59 fn start(&self) -> u64 { self.1 }
60 fn set_start(&mut self, start: u64) -> &mut Self {
61 self.1 = start;
62 self
63 }
64 fn end(&self) -> u64 { self.2 }
65 fn set_end(&mut self, end: u64) -> &mut Self {
66 self.2 = end;
67 self
68 }
69 fn name(&self) -> Option<&str> { None }
70 fn score(&self) -> Option<Score> { None }
71 fn strand(&self) -> Option<Strand> { None }
72}
73
74impl fmt::Display for GenomicRange {
75 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
76 write!(f, "{}{}{}{}{}", self.chrom(), DELIMITER, self.start(),
77 DELIMITER, self.end()
78 )?;
79 Ok(())
80 }
81}
82
83
84#[derive(Encode, Decode, Clone, Debug, Eq, PartialEq)]
86#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
87pub struct BED<const N: u8> {
88 chrom: String,
89 start: u64,
90 end: u64,
91 pub name: Option<String>,
92 pub score: Option<Score>,
93 pub strand: Option<Strand>,
94 pub optional_fields: OptionalFields,
95}
96
97impl<const N: u8> BED<N> {
98 pub fn new<C>(chrom: C, start: u64, end: u64, name: Option<String>,
99 score: Option<Score>, strand: Option<Strand>, optional_fields: OptionalFields) -> Self
100 where
101 C: Into<String>,
102 { Self { chrom: chrom.into(), start, end, name, score, strand, optional_fields } }
103}
104
105impl<const N: u8> BEDLike for BED<N> {
106 fn chrom(&self) -> &str { &self.chrom }
107 fn set_chrom(&mut self, chrom: &str) -> &mut Self {
108 self.chrom = chrom.to_string();
109 self
110 }
111 fn start(&self) -> u64 { self.start }
112 fn set_start(&mut self, start: u64) -> &mut Self {
113 self.start = start;
114 self
115 }
116 fn end(&self) -> u64 { self.end }
117 fn set_end(&mut self, end: u64) -> &mut Self {
118 self.end = end;
119 self
120 }
121 fn name(&self) -> Option<&str> { self.name.as_deref() }
122 fn score(&self) -> Option<Score> { self.score }
123 fn strand(&self) -> Option<Strand> { self.strand }
124}
125
126impl<const N: u8> fmt::Display for BED<N> {
128 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
129 write!(
130 f,
131 "{}{}{}{}{}",
132 self.chrom(),
133 DELIMITER,
134 self.start(),
135 DELIMITER,
136 self.end()
137 )?;
138 if N > 3 {
139 write!(f, "{}{}", DELIMITER, self.name().unwrap_or(MISSING_ITEM))?;
140 if N > 4 {
141 f.write_char(DELIMITER)?;
142 if let Some(score) = self.score() {
143 write!(f, "{}", score)?;
144 } else { f.write_str(MISSING_ITEM)?; }
145
146 if N > 5 {
147 f.write_char(DELIMITER)?;
148 if let Some(strand) = self.strand() {
149 write!(f, "{}", strand)?;
150 } else { f.write_str(MISSING_ITEM)?; }
151 }
152 }
153 }
154 Ok(())
155 }
156}
157
158impl<const N: u8> FromStr for BED<N> {
159 type Err = ParseError;
160
161 fn from_str(s: &str) -> Result<Self, Self::Err> {
162 let mut fields = s.split(DELIMITER);
163 let chrom = parse_chrom(&mut fields)?;
164 let start = parse_start(&mut fields)?;
165 let end = parse_end(&mut fields)?;
166 let name = if N > 3 { parse_name(&mut fields)? } else { None };
167 let score = if N > 4 { parse_score(&mut fields)? } else { None };
168 let strand = if N > 5 { parse_strand(&mut fields)? } else { None };
169 Ok(BED::new(chrom, start, end, name, score, strand, OptionalFields::default()))
170 }
171}
172
173#[derive(Encode, Decode, Clone, Debug, Default, Eq, PartialEq)]
175#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
176pub struct OptionalFields(Vec<String>);
177
178impl Deref for OptionalFields {
179 type Target = [String];
180
181 fn deref(&self) -> &Self::Target {
182 &self.0
183 }
184}
185
186impl fmt::Display for OptionalFields {
187 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
188 for (i, field) in self.0.iter().enumerate() {
189 if i > 0 {
190 f.write_char(DELIMITER)?;
191 }
192
193 f.write_str(field)?;
194 }
195
196 Ok(())
197 }
198}
199
200impl From<Vec<String>> for OptionalFields {
201 fn from(fields: Vec<String>) -> Self {
202 Self(fields)
203 }
204}
205
206
207#[derive(Encode, Decode, Clone, Debug, PartialEq)]
209#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
210pub struct NarrowPeak {
211 pub chrom: String,
212 pub start: u64,
213 pub end: u64,
214 pub name: Option<String>,
215 pub score: Option<Score>,
216 pub strand: Option<Strand>,
217 pub signal_value: f64,
218 pub p_value: Option<f64>,
219 pub q_value: Option<f64>,
220 pub peak: u64,
221}
222
223impl BEDLike for NarrowPeak {
224 fn chrom(&self) -> &str { &self.chrom }
225 fn set_chrom(&mut self, chrom: &str) -> &mut Self {
226 self.chrom = chrom.to_string();
227 self
228 }
229 fn start(&self) -> u64 { self.start }
230 fn set_start(&mut self, start: u64) -> &mut Self {
231 self.start = start;
232 self
233 }
234 fn end(&self) -> u64 { self.end }
235 fn set_end(&mut self, end: u64) -> &mut Self {
236 self.end = end;
237 self
238 }
239 fn name(&self) -> Option<&str> { self.name.as_deref() }
240 fn score(&self) -> Option<Score> { self.score }
241 fn strand(&self) -> Option<Strand> { self.strand }
242}
243
244impl fmt::Display for NarrowPeak {
245 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
246 write!(
247 f,
248 "{}{}{}{}{}{}{}",
249 self.chrom(),
250 DELIMITER, self.start(),
251 DELIMITER, self.end(),
252 DELIMITER, self.name().unwrap_or(MISSING_ITEM),
253 )?;
254
255 f.write_char(DELIMITER)?;
256 if let Some(x) = self.score() {
257 write!(f, "{}", x)?;
258 } else {
259 f.write_str(MISSING_ITEM)?;
260 }
261 f.write_char(DELIMITER)?;
262 if let Some(x) = self.strand() {
263 write!(f, "{}", x)?;
264 } else {
265 f.write_str(MISSING_ITEM)?;
266 }
267 write!(
268 f,
269 "{}{}{}{}{}{}{}{}",
270 DELIMITER, self.signal_value,
271 DELIMITER, self.p_value.unwrap_or(-1.0),
272 DELIMITER, self.q_value.unwrap_or(-1.0),
273 DELIMITER, self.peak,
274 )?;
275
276 Ok(())
277 }
278}
279
280impl FromStr for NarrowPeak {
281 type Err = ParseError;
282
283 fn from_str(s: &str) -> Result<Self, Self::Err> {
284 let mut fields = s.split(DELIMITER);
285 Ok(Self {
286 chrom: parse_chrom(&mut fields)?.to_string(),
287 start: parse_start(&mut fields)?,
288 end: parse_end(&mut fields)?,
289 name: parse_name(&mut fields)?,
290 score: parse_score(&mut fields)?,
291 strand: parse_strand(&mut fields)?,
292 signal_value: fields.next().unwrap().parse().unwrap(),
293 p_value: parse_pvalue(&mut fields).unwrap(),
294 q_value: parse_pvalue(&mut fields).unwrap(),
295 peak: fields.next().unwrap().parse().unwrap(),
296 })
297 }
298}
299
300#[derive(Encode, Decode, Clone, Debug, PartialEq)]
302#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
303pub struct BroadPeak {
304 pub chrom: String,
305 pub start: u64,
306 pub end: u64,
307 pub name: Option<String>,
308 pub score: Option<Score>,
309 pub strand: Option<Strand>,
310 pub signal_value: f64,
311 pub p_value: Option<f64>,
312 pub q_value: Option<f64>,
313}
314
315impl BEDLike for BroadPeak {
316 fn chrom(&self) -> &str { &self.chrom }
317 fn set_chrom(&mut self, chrom: &str) -> &mut Self {
318 self.chrom = chrom.to_string();
319 self
320 }
321 fn start(&self) -> u64 { self.start }
322 fn set_start(&mut self, start: u64) -> &mut Self {
323 self.start = start;
324 self
325 }
326 fn end(&self) -> u64 { self.end }
327 fn set_end(&mut self, end: u64) -> &mut Self {
328 self.end = end;
329 self
330 }
331 fn name(&self) -> Option<&str> { self.name.as_deref() }
332 fn score(&self) -> Option<Score> { self.score }
333 fn strand(&self) -> Option<Strand> { self.strand }
334}
335
336impl fmt::Display for BroadPeak {
337 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
338 write!(
339 f,
340 "{}{}{}{}{}{}{}",
341 self.chrom(),
342 DELIMITER, self.start(),
343 DELIMITER, self.end(),
344 DELIMITER, self.name().unwrap_or(MISSING_ITEM),
345 )?;
346
347 f.write_char(DELIMITER)?;
348 if let Some(x) = self.score() {
349 write!(f, "{}", x)?;
350 } else {
351 f.write_str(MISSING_ITEM)?;
352 }
353 f.write_char(DELIMITER)?;
354 if let Some(x) = self.strand() {
355 write!(f, "{}", x)?;
356 } else {
357 f.write_str(MISSING_ITEM)?;
358 }
359 write!(
360 f,
361 "{}{}{}{}{}{}",
362 DELIMITER, self.signal_value,
363 DELIMITER, self.p_value.unwrap_or(-1.0),
364 DELIMITER, self.q_value.unwrap_or(-1.0),
365 )?;
366
367 Ok(())
368 }
369}
370
371impl FromStr for BroadPeak {
372 type Err = ParseError;
373
374 fn from_str(s: &str) -> Result<Self, Self::Err> {
375 let mut fields = s.split(DELIMITER);
376 Ok(Self {
377 chrom: parse_chrom(&mut fields)?.to_string(),
378 start: parse_start(&mut fields)?,
379 end: parse_end(&mut fields)?,
380 name: parse_name(&mut fields)?,
381 score: parse_score(&mut fields)?,
382 strand: parse_strand(&mut fields)?,
383 signal_value: fields.next().unwrap().parse().unwrap(),
384 p_value: parse_pvalue(&mut fields).unwrap(),
385 q_value: parse_pvalue(&mut fields).unwrap(),
386 })
387 }
388}
389
390#[derive(Encode, Decode, Clone, Debug, PartialEq)]
393#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
394pub struct BedGraph<V> {
395 pub chrom: String,
396 pub start: u64,
397 pub end: u64,
398 pub value: V,
399}
400
401impl<V> BedGraph<V> {
402 pub fn new<C>(chrom: C, start: u64, end: u64, value: V) -> Self
403 where
404 C: Into<String>,
405 { Self { chrom: chrom.into(), start, end, value } }
406
407 pub fn from_bed<B: BEDLike>(bed: &B, value: V) -> Self {
408 Self::new(bed.chrom(), bed.start(), bed.end(), value)
409 }
410}
411
412impl<V> BEDLike for BedGraph<V> {
413 fn chrom(&self) -> &str { &self.chrom }
414 fn set_chrom(&mut self, chrom: &str) -> &mut Self {
415 self.chrom = chrom.to_string();
416 self
417 }
418 fn start(&self) -> u64 { self.start }
419 fn set_start(&mut self, start: u64) -> &mut Self {
420 self.start = start;
421 self
422 }
423 fn end(&self) -> u64 { self.end }
424 fn set_end(&mut self, end: u64) -> &mut Self {
425 self.end = end;
426 self
427 }
428 fn name(&self) -> Option<&str> { None }
429 fn score(&self) -> Option<Score> { None }
430 fn strand(&self) -> Option<Strand> { None }
431}
432
433impl<V> fmt::Display for BedGraph<V>
434where
435 V: fmt::Display,
436{
437 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
438 {
439 write!(
440 f,
441 "{}{}{}{}{}{}{}",
442 self.chrom(),
443 DELIMITER, self.start(),
444 DELIMITER, self.end(),
445 DELIMITER, self.value,
446 )
447 }
448}
449
450impl<V> FromStr for BedGraph<V>
451where
452 V: FromStr,
453 <V as FromStr>::Err: std::fmt::Debug,
454{
455 type Err = ParseError;
456
457 fn from_str(s: &str) -> Result<Self, Self::Err>
458 {
459 let mut fields = s.split(DELIMITER);
460 Ok(Self {
461 chrom: parse_chrom(&mut fields)?.to_string(),
462 start: parse_start(&mut fields)?,
463 end: parse_end(&mut fields)?,
464 value: fields.next().unwrap().parse().unwrap(),
465 })
466 }
467}
468
469fn parse_chrom<'a, I>(fields: &mut I) -> Result<&'a str, ParseError>
470where
471 I: Iterator<Item = &'a str>,
472{
473 fields
474 .next()
475 .ok_or(ParseError::MissingReferenceSequenceName)
476}
477
478fn parse_start<'a, I>(fields: &mut I) -> Result<u64, ParseError>
479where
480 I: Iterator<Item = &'a str>,
481{
482 fields
483 .next()
484 .ok_or(ParseError::MissingStartPosition)
485 .and_then(|s| lexical::parse(s).map_err(ParseError::InvalidStartPosition))
486}
487
488fn parse_end<'a, I>(fields: &mut I) -> Result<u64, ParseError>
489where
490 I: Iterator<Item = &'a str>,
491{
492 fields
493 .next()
494 .ok_or(ParseError::MissingEndPosition)
495 .and_then(|s| lexical::parse(s).map_err(ParseError::InvalidEndPosition))
496}
497
498fn parse_name<'a, I>(fields: &mut I) -> Result<Option<String>, ParseError>
499where
500 I: Iterator<Item = &'a str>,
501{
502 fields
503 .next()
504 .ok_or(ParseError::MissingName)
505 .map(|s| match s {
506 MISSING_ITEM => None,
507 _ => Some(s.into()),
508 })
509}
510
511fn parse_score<'a, I>(fields: &mut I) -> Result<Option<Score>, ParseError>
512where
513 I: Iterator<Item = &'a str>,
514{
515 fields
516 .next()
517 .ok_or(ParseError::MissingScore)
518 .and_then(|s| match s {
519 MISSING_ITEM => Ok(None),
520 _ => s.parse().map(Some).map_err(ParseError::InvalidScore),
521 })
522}
523
524fn parse_strand<'a, I>(fields: &mut I) -> Result<Option<Strand>, ParseError>
525where
526 I: Iterator<Item = &'a str>,
527{
528 fields
529 .next()
530 .ok_or(ParseError::MissingStrand)
531 .and_then(|s| match s {
532 MISSING_ITEM => Ok(None),
533 _ => s.parse().map(Some).map_err(ParseError::InvalidStrand),
534 })
535}
536
537fn parse_pvalue<'a, I>(fields: &mut I) -> Result<Option<f64>, ParseError>
538where
539 I: Iterator<Item = &'a str>,
540{
541 fields
542 .next()
543 .ok_or(ParseError::MissingScore)
544 .and_then(|s| {
545 let p = s.parse().unwrap();
546 if p < 0.0 { Ok(None) } else { Ok(Some(p)) }
547 })
548}
549
550#[derive(Clone, Debug, Eq, PartialEq)]
552pub enum ParseError {
553 MissingReferenceSequenceName,
555 MissingStartPosition,
557 InvalidStartPosition(lexical::Error),
559 MissingEndPosition,
561 InvalidEndPosition(lexical::Error),
563 MissingName,
565 MissingScore,
567 InvalidScore(score::ParseError),
569 MissingStrand,
571 InvalidStrand(strand::ParseError),
573}
574
575#[cfg(test)]
576mod bed_tests {
577 use super::*;
578
579 #[test]
580 fn test_fmt() {
581 let fields = OptionalFields::default();
582 assert_eq!(fields.to_string(), "");
583
584 let fields = OptionalFields::from(vec![String::from("n")]);
585 assert_eq!(fields.to_string(), "n");
586
587 let fields = OptionalFields::from(vec![String::from("n"), String::from("d")]);
588 assert_eq!(fields.to_string(), "n\td");
589
590 let genomic_range = GenomicRange::new("chr1", 100, 200);
591 assert_eq!(genomic_range, GenomicRange::from_str("chr1\t100\t200").unwrap());
592 assert_eq!(genomic_range, GenomicRange::from_str("chr1-100-200").unwrap());
593 assert_eq!(genomic_range, GenomicRange::from_str("chr1:100-200").unwrap());
594 assert_eq!(genomic_range, GenomicRange::from_str("chr1:100:200").unwrap());
595 assert_eq!(genomic_range, GenomicRange::from_str(&genomic_range.pretty_show()).unwrap());
596 }
597}