1pub mod io;
2pub mod map;
3
4mod bed_trait;
5pub use bed_trait::*;
6mod score;
7use bincode::{Decode, Encode};
8pub use score::Score;
9mod strand;
10pub use strand::Strand;
11
12use std::{fmt::{self, Write}, ops::Deref, str::FromStr};
13
14const DELIMITER: char = '\t';
15const MISSING_ITEM : &str = ".";
16
17#[derive(Encode, Decode, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
19pub struct GenomicRange(String, u64, u64);
20
21impl GenomicRange {
22 pub fn new<C>(chrom: C, start: u64, end: u64) -> Self
23 where
24 C: Into<String>,
25 { Self(chrom.into(), start, end) }
26
27 pub fn pretty_show(&self) -> String {
29 format!("{}:{}-{}", self.0, self.1, self.2)
30 }
31}
32
33impl FromStr for GenomicRange {
38 type Err = ParseError;
39
40 fn from_str(s: &str) -> Result<Self, Self::Err> {
41 let mut fields = s.split(&['\t', ':', '-']);
42 let chrom = parse_chrom(&mut fields)?;
43 let start = parse_start(&mut fields)?;
44 let end = parse_end(&mut fields)?;
45 Ok(GenomicRange::new(chrom, start, end))
46 }
47}
48
49impl BEDLike for GenomicRange {
50 fn chrom(&self) -> &str { &self.0 }
51 fn set_chrom(&mut self, chrom: &str) -> &mut Self {
52 self.0 = chrom.to_string();
53 self
54 }
55 fn start(&self) -> u64 { self.1 }
56 fn set_start(&mut self, start: u64) -> &mut Self {
57 self.1 = start;
58 self
59 }
60 fn end(&self) -> u64 { self.2 }
61 fn set_end(&mut self, end: u64) -> &mut Self {
62 self.2 = end;
63 self
64 }
65 fn name(&self) -> Option<&str> { None }
66 fn score(&self) -> Option<Score> { None }
67 fn strand(&self) -> Option<Strand> { None }
68}
69
70impl fmt::Display for GenomicRange {
71 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
72 write!(f, "{}{}{}{}{}", self.chrom(), DELIMITER, self.start(),
73 DELIMITER, self.end()
74 )?;
75 Ok(())
76 }
77}
78
79
80#[derive(Encode, Decode, Clone, Debug, Eq, PartialEq)]
82pub struct BED<const N: u8> {
83 chrom: String,
84 start: u64,
85 end: u64,
86 pub name: Option<String>,
87 pub score: Option<Score>,
88 pub strand: Option<Strand>,
89 pub optional_fields: OptionalFields,
90}
91
92impl<const N: u8> BED<N> {
93 pub fn new<C>(chrom: C, start: u64, end: u64, name: Option<String>,
94 score: Option<Score>, strand: Option<Strand>, optional_fields: OptionalFields) -> Self
95 where
96 C: Into<String>,
97 { Self { chrom: chrom.into(), start, end, name, score, strand, optional_fields } }
98}
99
100impl<const N: u8> BEDLike for BED<N> {
101 fn chrom(&self) -> &str { &self.chrom }
102 fn set_chrom(&mut self, chrom: &str) -> &mut Self {
103 self.chrom = chrom.to_string();
104 self
105 }
106 fn start(&self) -> u64 { self.start }
107 fn set_start(&mut self, start: u64) -> &mut Self {
108 self.start = start;
109 self
110 }
111 fn end(&self) -> u64 { self.end }
112 fn set_end(&mut self, end: u64) -> &mut Self {
113 self.end = end;
114 self
115 }
116 fn name(&self) -> Option<&str> { self.name.as_deref() }
117 fn score(&self) -> Option<Score> { self.score }
118 fn strand(&self) -> Option<Strand> { self.strand }
119}
120
121impl<const N: u8> fmt::Display for BED<N> {
123 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
124 write!(
125 f,
126 "{}{}{}{}{}",
127 self.chrom(),
128 DELIMITER,
129 self.start(),
130 DELIMITER,
131 self.end()
132 )?;
133 if N > 3 {
134 write!(f, "{}{}", DELIMITER, self.name().unwrap_or(MISSING_ITEM))?;
135 if N > 4 {
136 f.write_char(DELIMITER)?;
137 if let Some(score) = self.score() {
138 write!(f, "{}", score)?;
139 } else { f.write_str(MISSING_ITEM)?; }
140
141 if N > 5 {
142 f.write_char(DELIMITER)?;
143 if let Some(strand) = self.strand() {
144 write!(f, "{}", strand)?;
145 } else { f.write_str(MISSING_ITEM)?; }
146 }
147 }
148 }
149 Ok(())
150 }
151}
152
153impl<const N: u8> FromStr for BED<N> {
154 type Err = ParseError;
155
156 fn from_str(s: &str) -> Result<Self, Self::Err> {
157 let mut fields = s.split(DELIMITER);
158 let chrom = parse_chrom(&mut fields)?;
159 let start = parse_start(&mut fields)?;
160 let end = parse_end(&mut fields)?;
161 let name = if N > 3 { parse_name(&mut fields)? } else { None };
162 let score = if N > 4 { parse_score(&mut fields)? } else { None };
163 let strand = if N > 5 { parse_strand(&mut fields)? } else { None };
164 Ok(BED::new(chrom, start, end, name, score, strand, OptionalFields::default()))
165 }
166}
167
168#[derive(Encode, Decode, Clone, Debug, Default, Eq, PartialEq)]
170pub struct OptionalFields(Vec<String>);
171
172impl Deref for OptionalFields {
173 type Target = [String];
174
175 fn deref(&self) -> &Self::Target {
176 &self.0
177 }
178}
179
180impl fmt::Display for OptionalFields {
181 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
182 for (i, field) in self.0.iter().enumerate() {
183 if i > 0 {
184 f.write_char(DELIMITER)?;
185 }
186
187 f.write_str(field)?;
188 }
189
190 Ok(())
191 }
192}
193
194impl From<Vec<String>> for OptionalFields {
195 fn from(fields: Vec<String>) -> Self {
196 Self(fields)
197 }
198}
199
200
201#[derive(Encode, Decode, Clone, Debug, PartialEq)]
203pub struct NarrowPeak {
204 pub chrom: String,
205 pub start: u64,
206 pub end: u64,
207 pub name: Option<String>,
208 pub score: Option<Score>,
209 pub strand: Option<Strand>,
210 pub signal_value: f64,
211 pub p_value: Option<f64>,
212 pub q_value: Option<f64>,
213 pub peak: u64,
214}
215
216impl BEDLike for NarrowPeak {
217 fn chrom(&self) -> &str { &self.chrom }
218 fn set_chrom(&mut self, chrom: &str) -> &mut Self {
219 self.chrom = chrom.to_string();
220 self
221 }
222 fn start(&self) -> u64 { self.start }
223 fn set_start(&mut self, start: u64) -> &mut Self {
224 self.start = start;
225 self
226 }
227 fn end(&self) -> u64 { self.end }
228 fn set_end(&mut self, end: u64) -> &mut Self {
229 self.end = end;
230 self
231 }
232 fn name(&self) -> Option<&str> { self.name.as_deref() }
233 fn score(&self) -> Option<Score> { self.score }
234 fn strand(&self) -> Option<Strand> { self.strand }
235}
236
237impl fmt::Display for NarrowPeak {
238 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
239 write!(
240 f,
241 "{}{}{}{}{}{}{}",
242 self.chrom(),
243 DELIMITER, self.start(),
244 DELIMITER, self.end(),
245 DELIMITER, self.name().unwrap_or(MISSING_ITEM),
246 )?;
247
248 f.write_char(DELIMITER)?;
249 if let Some(x) = self.score() {
250 write!(f, "{}", x)?;
251 } else {
252 f.write_str(MISSING_ITEM)?;
253 }
254 f.write_char(DELIMITER)?;
255 if let Some(x) = self.strand() {
256 write!(f, "{}", x)?;
257 } else {
258 f.write_str(MISSING_ITEM)?;
259 }
260 write!(
261 f,
262 "{}{}{}{}{}{}{}{}",
263 DELIMITER, self.signal_value,
264 DELIMITER, self.p_value.unwrap_or(-1.0),
265 DELIMITER, self.q_value.unwrap_or(-1.0),
266 DELIMITER, self.peak,
267 )?;
268
269 Ok(())
270 }
271}
272
273impl FromStr for NarrowPeak {
274 type Err = ParseError;
275
276 fn from_str(s: &str) -> Result<Self, Self::Err> {
277 let mut fields = s.split(DELIMITER);
278 Ok(Self {
279 chrom: parse_chrom(&mut fields)?.to_string(),
280 start: parse_start(&mut fields)?,
281 end: parse_end(&mut fields)?,
282 name: parse_name(&mut fields)?,
283 score: parse_score(&mut fields)?,
284 strand: parse_strand(&mut fields)?,
285 signal_value: fields.next().unwrap().parse().unwrap(),
286 p_value: parse_pvalue(&mut fields).unwrap(),
287 q_value: parse_pvalue(&mut fields).unwrap(),
288 peak: fields.next().unwrap().parse().unwrap(),
289 })
290 }
291}
292
293#[derive(Encode, Decode, Clone, Debug, PartialEq)]
295pub struct BroadPeak {
296 pub chrom: String,
297 pub start: u64,
298 pub end: u64,
299 pub name: Option<String>,
300 pub score: Option<Score>,
301 pub strand: Option<Strand>,
302 pub signal_value: f64,
303 pub p_value: Option<f64>,
304 pub q_value: Option<f64>,
305}
306
307impl BEDLike for BroadPeak {
308 fn chrom(&self) -> &str { &self.chrom }
309 fn set_chrom(&mut self, chrom: &str) -> &mut Self {
310 self.chrom = chrom.to_string();
311 self
312 }
313 fn start(&self) -> u64 { self.start }
314 fn set_start(&mut self, start: u64) -> &mut Self {
315 self.start = start;
316 self
317 }
318 fn end(&self) -> u64 { self.end }
319 fn set_end(&mut self, end: u64) -> &mut Self {
320 self.end = end;
321 self
322 }
323 fn name(&self) -> Option<&str> { self.name.as_deref() }
324 fn score(&self) -> Option<Score> { self.score }
325 fn strand(&self) -> Option<Strand> { self.strand }
326}
327
328impl fmt::Display for BroadPeak {
329 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
330 write!(
331 f,
332 "{}{}{}{}{}{}{}",
333 self.chrom(),
334 DELIMITER, self.start(),
335 DELIMITER, self.end(),
336 DELIMITER, self.name().unwrap_or(MISSING_ITEM),
337 )?;
338
339 f.write_char(DELIMITER)?;
340 if let Some(x) = self.score() {
341 write!(f, "{}", x)?;
342 } else {
343 f.write_str(MISSING_ITEM)?;
344 }
345 f.write_char(DELIMITER)?;
346 if let Some(x) = self.strand() {
347 write!(f, "{}", x)?;
348 } else {
349 f.write_str(MISSING_ITEM)?;
350 }
351 write!(
352 f,
353 "{}{}{}{}{}{}",
354 DELIMITER, self.signal_value,
355 DELIMITER, self.p_value.unwrap_or(-1.0),
356 DELIMITER, self.q_value.unwrap_or(-1.0),
357 )?;
358
359 Ok(())
360 }
361}
362
363impl FromStr for BroadPeak {
364 type Err = ParseError;
365
366 fn from_str(s: &str) -> Result<Self, Self::Err> {
367 let mut fields = s.split(DELIMITER);
368 Ok(Self {
369 chrom: parse_chrom(&mut fields)?.to_string(),
370 start: parse_start(&mut fields)?,
371 end: parse_end(&mut fields)?,
372 name: parse_name(&mut fields)?,
373 score: parse_score(&mut fields)?,
374 strand: parse_strand(&mut fields)?,
375 signal_value: fields.next().unwrap().parse().unwrap(),
376 p_value: parse_pvalue(&mut fields).unwrap(),
377 q_value: parse_pvalue(&mut fields).unwrap(),
378 })
379 }
380}
381
382#[derive(Encode, Decode, Clone, Debug, PartialEq)]
385pub struct BedGraph<V> {
386 pub chrom: String,
387 pub start: u64,
388 pub end: u64,
389 pub value: V,
390}
391
392impl<V> BedGraph<V> {
393 pub fn new<C>(chrom: C, start: u64, end: u64, value: V) -> Self
394 where
395 C: Into<String>,
396 { Self { chrom: chrom.into(), start, end, value } }
397
398 pub fn from_bed<B: BEDLike>(bed: &B, value: V) -> Self {
399 Self::new(bed.chrom(), bed.start(), bed.end(), value)
400 }
401}
402
403impl<V> BEDLike for BedGraph<V> {
404 fn chrom(&self) -> &str { &self.chrom }
405 fn set_chrom(&mut self, chrom: &str) -> &mut Self {
406 self.chrom = chrom.to_string();
407 self
408 }
409 fn start(&self) -> u64 { self.start }
410 fn set_start(&mut self, start: u64) -> &mut Self {
411 self.start = start;
412 self
413 }
414 fn end(&self) -> u64 { self.end }
415 fn set_end(&mut self, end: u64) -> &mut Self {
416 self.end = end;
417 self
418 }
419 fn name(&self) -> Option<&str> { None }
420 fn score(&self) -> Option<Score> { None }
421 fn strand(&self) -> Option<Strand> { None }
422}
423
424impl<V> fmt::Display for BedGraph<V>
425where
426 V: fmt::Display,
427{
428 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
429 {
430 write!(
431 f,
432 "{}{}{}{}{}{}{}",
433 self.chrom(),
434 DELIMITER, self.start(),
435 DELIMITER, self.end(),
436 DELIMITER, self.value,
437 )
438 }
439}
440
441impl<V> FromStr for BedGraph<V>
442where
443 V: FromStr,
444 <V as FromStr>::Err: std::fmt::Debug,
445{
446 type Err = ParseError;
447
448 fn from_str(s: &str) -> Result<Self, Self::Err>
449 {
450 let mut fields = s.split(DELIMITER);
451 Ok(Self {
452 chrom: parse_chrom(&mut fields)?.to_string(),
453 start: parse_start(&mut fields)?,
454 end: parse_end(&mut fields)?,
455 value: fields.next().unwrap().parse().unwrap(),
456 })
457 }
458}
459
460fn parse_chrom<'a, I>(fields: &mut I) -> Result<&'a str, ParseError>
461where
462 I: Iterator<Item = &'a str>,
463{
464 fields
465 .next()
466 .ok_or(ParseError::MissingReferenceSequenceName)
467}
468
469fn parse_start<'a, I>(fields: &mut I) -> Result<u64, ParseError>
470where
471 I: Iterator<Item = &'a str>,
472{
473 fields
474 .next()
475 .ok_or(ParseError::MissingStartPosition)
476 .and_then(|s| lexical::parse(s).map_err(ParseError::InvalidStartPosition))
477}
478
479fn parse_end<'a, I>(fields: &mut I) -> Result<u64, ParseError>
480where
481 I: Iterator<Item = &'a str>,
482{
483 fields
484 .next()
485 .ok_or(ParseError::MissingEndPosition)
486 .and_then(|s| lexical::parse(s).map_err(ParseError::InvalidEndPosition))
487}
488
489fn parse_name<'a, I>(fields: &mut I) -> Result<Option<String>, ParseError>
490where
491 I: Iterator<Item = &'a str>,
492{
493 fields
494 .next()
495 .ok_or(ParseError::MissingName)
496 .map(|s| match s {
497 MISSING_ITEM => None,
498 _ => Some(s.into()),
499 })
500}
501
502fn parse_score<'a, I>(fields: &mut I) -> Result<Option<Score>, ParseError>
503where
504 I: Iterator<Item = &'a str>,
505{
506 fields
507 .next()
508 .ok_or(ParseError::MissingScore)
509 .and_then(|s| match s {
510 MISSING_ITEM => Ok(None),
511 _ => s.parse().map(Some).map_err(ParseError::InvalidScore),
512 })
513}
514
515fn parse_strand<'a, I>(fields: &mut I) -> Result<Option<Strand>, ParseError>
516where
517 I: Iterator<Item = &'a str>,
518{
519 fields
520 .next()
521 .ok_or(ParseError::MissingStrand)
522 .and_then(|s| match s {
523 MISSING_ITEM => Ok(None),
524 _ => s.parse().map(Some).map_err(ParseError::InvalidStrand),
525 })
526}
527
528fn parse_pvalue<'a, I>(fields: &mut I) -> Result<Option<f64>, ParseError>
529where
530 I: Iterator<Item = &'a str>,
531{
532 fields
533 .next()
534 .ok_or(ParseError::MissingScore)
535 .and_then(|s| {
536 let p = s.parse().unwrap();
537 if p < 0.0 { Ok(None) } else { Ok(Some(p)) }
538 })
539}
540
541#[derive(Clone, Debug, Eq, PartialEq)]
543pub enum ParseError {
544 MissingReferenceSequenceName,
546 MissingStartPosition,
548 InvalidStartPosition(lexical::Error),
550 MissingEndPosition,
552 InvalidEndPosition(lexical::Error),
554 MissingName,
556 MissingScore,
558 InvalidScore(score::ParseError),
560 MissingStrand,
562 InvalidStrand(strand::ParseError),
564}
565
566#[cfg(test)]
567mod bed_tests {
568 use super::*;
569
570 #[test]
571 fn test_fmt() {
572 let fields = OptionalFields::default();
573 assert_eq!(fields.to_string(), "");
574
575 let fields = OptionalFields::from(vec![String::from("n")]);
576 assert_eq!(fields.to_string(), "n");
577
578 let fields = OptionalFields::from(vec![String::from("n"), String::from("d")]);
579 assert_eq!(fields.to_string(), "n\td");
580
581 let genomic_range = GenomicRange::new("chr1", 100, 200);
582 assert_eq!(genomic_range, GenomicRange::from_str("chr1\t100\t200").unwrap());
583 assert_eq!(genomic_range, GenomicRange::from_str("chr1-100-200").unwrap());
584 assert_eq!(genomic_range, GenomicRange::from_str("chr1:100-200").unwrap());
585 assert_eq!(genomic_range, GenomicRange::from_str("chr1:100:200").unwrap());
586 assert_eq!(genomic_range, GenomicRange::from_str(&genomic_range.pretty_show()).unwrap());
587 }
588}