readfish_tools/lib.rs
1#![deny(missing_docs)]
2#![warn(clippy::missing_docs_in_private_items)]
3#![allow(dead_code)]
4//! # Readfish-tools
5//!
6//! `readfish-tools` is a collection of utilities to provide a standardised way of analysing
7//! readfish runs that have been run. Currently the accepted analysable inputs are sequencing summary files,
8//! BAM of all produced FASTQ, and the `TOML` file that was used to configure the readfish run.
9//!
10//! The intention is to demultiplex a bam/paf/sequencing summary into regions and barcodes then have methods to provide the
11//! summary stats for this function.
12//!
13//! The crate is split into modules handling separate functionalities.
14//!
15//! ## Modules
16//! nanopore - Flowcell related functionality.
17//! channels - Channel Hashmaps for MinION and Flongle.
18//! paf - PAF related functionality.
19//! readfish - Readfish TOML related functionality.
20//! readfish_io - Custom functions and wrappers related IO functionality.
21//! sequencing_summary - Sequencing summary related functionality.
22mod channels;
23pub mod nanopore;
24pub mod paf;
25pub mod readfish;
26mod readfish_io;
27mod sequencing_summary;
28use std::{
29 cell::RefCell,
30 collections::HashMap,
31 fmt,
32 ops::Deref,
33 path::{Path, PathBuf},
34};
35
36use itertools::Itertools;
37use nanopore::{format_bases, running_mean};
38use num_format::{Locale, ToFormattedString};
39use paf::{Metadata, Paf, PafRecord, _parse_paf_line};
40use prettytable::{color, row, Attr, Cell, Row, Table};
41use pyo3::{prelude::*, types::PyIterator};
42use readfish::Conf;
43use readfish_io::DynResult;
44use sequencing_summary::SeqSum;
45
46/// Represents the mean read lengths for on-target, off-target, and total reads.
47#[derive(Debug)]
48pub struct MeanReadLengths {
49 /// The mean read length of on-target reads.
50 pub on_target: isize,
51 /// Number of on target reads analysed
52 on_target_count: isize,
53 /// The mean read length of off-target reads.
54 pub off_target: isize,
55 /// Number of off target reads analysed
56 off_target_count: isize,
57 /// The mean read length of all reads (on-target + off-target).
58 pub total: isize,
59 /// Number of reads analysed
60 total_count: isize,
61}
62
63impl MeanReadLengths {
64 /// Creates a new `MeanReadLengths` instance with all fields initialized to 0.
65 pub fn new() -> Self {
66 MeanReadLengths {
67 on_target: 0,
68 on_target_count: 0,
69 off_target: 0,
70 off_target_count: 0,
71 total: 0,
72 total_count: 0,
73 }
74 }
75
76 /// Updates the mean read lengths for on-target, off-target, and total reads based on the provided
77 /// PAF record and whether the read is on-target or off-target.
78 ///
79 /// # Arguments
80 ///
81 /// * `paf` - A reference to the [`PafRecord`] representing the alignment record for a read.
82 /// * `on_target` - A boolean indicating whether the read is on-target (true) or off-target (false).
83 ///
84 /// # Example
85 ///
86 /// ```
87 /// use readfish_tools::{MeanReadLengths, paf::PafRecord};
88 /// let mut mean_lengths = MeanReadLengths::new();
89 /// let paf_record = PafRecord::new("read123 200 0 200 + contig123 300 0 300 200 200 50 ch=1".split(" ").collect()).unwrap();
90 /// mean_lengths.update_lengths(&paf_record, true);
91 /// ```
92 pub fn update_lengths(&mut self, paf: &PafRecord, on_target: bool) {
93 if on_target {
94 running_mean(
95 &mut self.on_target,
96 &mut self.on_target_count,
97 &mut (paf.query_length as isize),
98 );
99 } else {
100 running_mean(
101 &mut self.off_target,
102 &mut self.off_target_count,
103 &mut (paf.query_length as isize),
104 );
105 }
106 running_mean(
107 &mut self.total,
108 &mut self.total_count,
109 &mut (paf.query_length as isize),
110 );
111 }
112}
113
114impl Default for MeanReadLengths {
115 fn default() -> Self {
116 Self::new()
117 }
118}
119
120/// Represents a summary of a contig or sequence from a sequencing experiment.
121/// It includes various metrics related to the contig's characteristics and read mapping.
122#[derive(Debug)]
123pub struct ContigSummary {
124 /// The name or identifier of the contig.
125 pub name: String,
126 /// The length of the contig in base pairs.
127 pub length: usize,
128 /// The mean read length of the mapped reads associated with this contig.
129 pub mean_read_lengths: MeanReadLengths,
130 /// The mean read quality of the mapped reads associated with this contig.
131 pub mean_read_quality: f64,
132 /// Yield of mapped reads
133 pub total_bases: usize,
134 /// The N50 metric for the contig, representing the length at which the cumulative
135 /// sum of contig lengths reaches half of the total assembly length.
136 pub n50: usize,
137 /// The count of reads that are mapped on the target region (on-target reads).
138 pub on_target_read_count: usize,
139 /// The count of reads that are mapped off the target region (off-target reads).
140 pub off_target_read_count: usize,
141 /// The total yield (base pairs) of on-target reads for this contig.
142 pub yield_on_target: usize,
143 /// The total yield (base pairs) of off-target reads for this contig.
144 pub yield_off_target: usize,
145}
146impl ContigSummary {
147 /// Create a new `ContigSummary` instance with default values for all fields except `name` and `length`.
148 ///
149 /// # Arguments
150 ///
151 /// * `name` - The name of the contig.
152 /// * `length` - The length of the contig.
153 pub fn new(name: String, length: usize) -> Self {
154 ContigSummary {
155 name,
156 length,
157 mean_read_lengths: MeanReadLengths::new(),
158 mean_read_quality: 0.0,
159 total_bases: 0,
160 n50: 0,
161 on_target_read_count: 0,
162 off_target_read_count: 0,
163 yield_on_target: 0,
164 yield_off_target: 0,
165 }
166 }
167 /// Get the total number of reads on the contig.
168 pub fn total_reads(&self) -> usize {
169 self.on_target_read_count + self.off_target_read_count
170 }
171
172 /// Mean read length of all reads on the contig.
173 pub fn mean_read_length(&self) -> usize {
174 self.mean_read_lengths.total as usize
175 }
176 /// On target mean read length of all reads on the contig.
177 pub fn on_target_mean_read_length(&self) -> usize {
178 self.mean_read_lengths.on_target as usize
179 }
180 /// Off target mean read length of all reads on the contig.
181 pub fn off_target_mean_read_length(&self) -> usize {
182 self.mean_read_lengths.off_target as usize
183 }
184}
185#[derive(Debug)]
186/// Represents a summary of sequencing data, including various metrics related to the output of the experiment.
187pub struct ConditionSummary {
188 /// The name or identifier of the sequencing data.
189 pub name: String,
190 /// The total number of reads in the sequencing data.
191 pub total_reads: usize,
192 /// Mean read lengths
193 pub mean_read_lengths: MeanReadLengths,
194 /// The count of reads that are mapped off the target regions (off-target reads).
195 pub off_target_read_count: usize,
196 /// The count of reads that are mapped to the target regions (on-target reads).
197 pub on_target_read_count: usize,
198 /// The percentage of off-target reads in the sequencing data.
199 pub off_target_percent: f64,
200 /// The total yield (base pairs) of off-target reads in the sequencing data.
201 pub off_target_yield: usize,
202 /// The total yield (base pairs) of on-target reads in the sequencing data.
203 pub on_target_yield: usize,
204 /// The mean read quality of off-target reads.
205 pub off_target_mean_read_quality: f64,
206 /// The mean read quality of on-target reads.
207 pub on_target_mean_read_quality: f64,
208 /// The N50 metric for the entire dataset, representing the length at which the cumulative
209 /// sum of contig lengths reaches half of the total assembly length.
210 pub n50: usize,
211 /// The N50 metric for on-target reads, representing the length at which the cumulative
212 /// sum of contig lengths reaches half of the total assembly length for on-target reads.
213 pub on_target_n50: usize,
214 /// The N50 metric for off-target reads, representing the length at which the cumulative
215 /// sum of contig lengths reaches half of the total assembly length for off-target reads.
216 pub off_target_n50: usize,
217 /// A vector of `ContigSummary` representing summaries of individual contigs or sequences
218 /// in the sequencing data.
219 pub contigs: HashMap<String, ContigSummary>,
220}
221
222impl fmt::Display for ConditionSummary {
223 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
224 writeln!(f, "Condition Name: {}", self.name)?;
225 writeln!(f, "Total Reads: {}", self.total_reads)?;
226 writeln!(f, "Off-Target Read Count: {}", self.off_target_read_count)?;
227 writeln!(f, "On-Target Read Count: {}", self.on_target_read_count)?;
228 writeln!(f, "Off-Target Percent: {:.2}%", self.off_target_percent)?;
229 writeln!(f, "Off-Target Yield: {}", self.off_target_yield)?;
230 writeln!(f, "On-Target Yield: {}", self.on_target_yield)?;
231 writeln!(
232 f,
233 "Off-Target Mean Read Length: {}",
234 self.off_target_mean_read_length()
235 )?;
236 writeln!(
237 f,
238 "On-Target Mean Read Length: {}",
239 self.on_target_mean_read_length()
240 )?;
241 // writeln!(
242 // f,
243 // "Off-Target Mean Read Quality: {:.2}",
244 // self.off_target_mean_read_quality
245 // )?;
246 // writeln!(
247 // f,
248 // "On-Target Mean Read Quality: {:.2}",
249 // self.on_target_mean_read_quality
250 // )?;
251 // writeln!(f, "N50: {}", self.n50)?;
252 // writeln!(f, "On-Target N50: {}", self.on_target_n50)?;
253 // writeln!(f, "Off-Target N50: {}", self.off_target_n50)?;
254
255 writeln!(f, "Contigs:")?;
256 for (contig_name, contig_summary) in &self.contigs {
257 writeln!(f, " Contig Name: {}", contig_name)?;
258 writeln!(f, " Length: {}", contig_summary.length)?;
259 // Print other fields from ContigSummary here
260 // For example:
261 // writeln!(f, " Contig Mean Read Length: {}", contig_summary.mean_read_length)?;
262 }
263 Ok(())
264 }
265}
266
267impl ConditionSummary {
268 /// Update the `ConditionSummary` with information from the provided `PafRecord`.
269 ///
270 /// This method updates the fields of the `ConditionSummary` based on the information
271 /// from the given `PafRecord`. It increments the appropriate read counts (on-target
272 /// or off-target), calculates the mean read lengths and read qualities, updates the
273 /// total reads count, and calculates the off-target percentage.
274 ///
275 /// # Arguments
276 ///
277 /// * `paf` - The [`PafRecord`] containing the information about the alignment.
278 /// * `on_target` - A boolean flag indicating whether the alignment is on-target or off-target.
279 ///
280 /// # Returns
281 ///
282 /// This function returns a [`DynResult`] (a dynamic result that can contain any error).
283 /// If the operation is successful, the `DynResult` will hold an `Ok(())`. Otherwise, it
284 /// will hold an `Err` containing a helpful error message.
285 pub fn update(&mut self, paf: PafRecord, on_target: bool) -> DynResult<()> {
286 // update the condition struct
287 self.total_reads += 1;
288 self.mean_read_lengths.update_lengths(&paf, on_target);
289 if on_target {
290 self.on_target_read_count += 1;
291 self.on_target_yield += paf.query_length;
292 // self.on_target_mean_read_quality += paf.tlen as f64;
293 } else {
294 self.off_target_read_count += 1;
295 self.off_target_yield += paf.query_length;
296 // self.off_target_mean_read_quality += paf.tlen as f64;
297 }
298 self.off_target_percent =
299 self.off_target_read_count as f64 / self.total_reads as f64 * 100.0;
300 let contig = self.get_or_add_contig(&paf.target_name, paf.target_length);
301 contig.total_bases += paf.query_length;
302 contig.mean_read_lengths.update_lengths(&paf, on_target);
303 if on_target {
304 contig.on_target_read_count += 1;
305 // self.on_target_mean_read_quality += paf.tlen as f64;
306 } else {
307 contig.off_target_read_count += 1;
308 contig.yield_off_target += paf.target_length;
309 // self.off_target_mean_read_quality += paf.tlen as f64;
310 }
311 // contig.mean_read_quality = paf.tlen;
312 // contig.n50 = paf.tlen;
313 // contig.on_target_read_count = paf.tlen;
314 // contig.off_target_read_count = paf.tlen;
315
316 Ok(())
317 }
318 /// Create a new `Summary` instance with default values for all fields except `name`.
319 ///
320 /// # Arguments
321 ///
322 /// * `name` - The name of the summary.
323 pub fn new(name: String) -> Self {
324 ConditionSummary {
325 name,
326 total_reads: 0,
327 off_target_read_count: 0,
328 on_target_read_count: 0,
329 off_target_percent: 0.0,
330 off_target_yield: 0,
331 on_target_yield: 0,
332 mean_read_lengths: MeanReadLengths::new(),
333 off_target_mean_read_quality: 0.0,
334 on_target_mean_read_quality: 0.0,
335 n50: 0,
336 on_target_n50: 0,
337 off_target_n50: 0,
338 contigs: HashMap::new(),
339 }
340 }
341
342 /// Get the name or identifier of the sequencing data.
343 pub fn name(&self) -> &str {
344 &self.name
345 }
346
347 /// Set the name or identifier of the sequencing data.
348 pub fn set_name(&mut self, name: String) {
349 self.name = name;
350 }
351
352 /// Get the total number of reads in the sequencing data.
353 pub fn total_reads(&self) -> usize {
354 self.total_reads
355 }
356
357 /// Set the total number of reads in the sequencing data.
358 pub fn add_total_reads(&mut self, total_reads: usize) {
359 self.total_reads += total_reads;
360 }
361
362 /// Get the count of reads that are mapped off the target regions (off-target reads).
363 pub fn off_target_read_count(&self) -> usize {
364 self.off_target_read_count
365 }
366
367 /// Set the count of reads that are mapped off the target regions (off-target reads).
368 pub fn set_off_target_read_count(&mut self, off_target_read_count: usize) {
369 self.off_target_read_count = off_target_read_count;
370 }
371
372 /// Get the count of reads that are mapped to the target regions (on-target reads).
373 pub fn on_target_read_count(&self) -> usize {
374 self.on_target_read_count
375 }
376
377 /// Set the count of reads that are mapped to the target regions (on-target reads).
378 pub fn set_on_target_read_count(&mut self, on_target_read_count: usize) {
379 self.on_target_read_count = on_target_read_count;
380 }
381
382 /// Get the percentage of off-target reads in the sequencing data.
383 pub fn off_target_percent(&self) -> f64 {
384 self.off_target_percent
385 }
386
387 /// Set the percentage of off-target reads in the sequencing data.
388 pub fn set_off_target_percent(&mut self, off_target_percent: f64) {
389 self.off_target_percent = off_target_percent;
390 }
391
392 /// Get the total yield (base pairs) of off-target reads in the sequencing data.
393 pub fn off_target_yield(&self) -> usize {
394 self.off_target_yield
395 }
396
397 /// Set the total yield (base pairs) of off-target reads in the sequencing data.
398 pub fn set_off_target_yield(&mut self, off_target_yield: usize) {
399 self.off_target_yield = off_target_yield;
400 }
401
402 /// Get the total yield (base pairs) of on-target reads in the sequencing data.
403 pub fn on_target_yield(&self) -> usize {
404 self.on_target_yield
405 }
406
407 /// Set the total yield (base pairs) of on-target reads in the sequencing data.
408 pub fn set_on_target_yield(&mut self, on_target_yield: usize) {
409 self.on_target_yield = on_target_yield;
410 }
411 /// Get the mean read length of all reads
412 pub fn mean_read_length(&self) -> usize {
413 self.mean_read_lengths.total as usize
414 }
415
416 /// Get the mean read length of off-target reads.
417 pub fn off_target_mean_read_length(&self) -> usize {
418 self.mean_read_lengths.off_target as usize
419 }
420
421 /// Get the mean read length of on-target reads.
422 pub fn on_target_mean_read_length(&self) -> usize {
423 self.mean_read_lengths.on_target as usize
424 }
425
426 /// Get the mean read quality of off-target reads.
427 pub fn off_target_mean_read_quality(&self) -> f64 {
428 self.off_target_mean_read_quality
429 }
430
431 /// Set the mean read quality of off-target reads.
432 pub fn set_off_target_mean_read_quality(&mut self, off_target_mean_read_quality: f64) {
433 self.off_target_mean_read_quality = off_target_mean_read_quality;
434 }
435
436 /// Get the mean read quality of on-target reads.
437 pub fn on_target_mean_read_quality(&self) -> f64 {
438 self.on_target_mean_read_quality
439 }
440
441 /// Set the mean read quality of on-target reads.
442 pub fn set_on_target_mean_read_quality(&mut self, on_target_mean_read_quality: f64) {
443 self.on_target_mean_read_quality = on_target_mean_read_quality;
444 }
445
446 /// Get the N50 metric for the entire dataset.
447 pub fn n50(&self) -> usize {
448 self.n50
449 }
450
451 /// Set the N50 metric for the entire dataset.
452 pub fn set_n50(&mut self, n50: usize) {
453 self.n50 = n50;
454 }
455
456 /// Get the N50 metric for on-target reads.
457 pub fn on_target_n50(&self) -> usize {
458 self.on_target_n50
459 }
460
461 /// Set the N50 metric for on-target reads.
462 pub fn set_on_target_n50(&mut self, on_target_n50: usize) {
463 self.on_target_n50 = on_target_n50;
464 }
465
466 /// Get the N50 metric for off-target reads.
467 pub fn off_target_n50(&self) -> usize {
468 self.off_target_n50
469 }
470
471 /// Set the N50 metric for off-target reads.
472 pub fn set_off_target_n50(&mut self, off_target_n50: usize) {
473 self.off_target_n50 = off_target_n50;
474 }
475
476 /// Get a reference to the vector of `ContigSummary`.
477 pub fn contigs(&self) -> &HashMap<String, ContigSummary> {
478 &self.contigs
479 }
480
481 /// Get a mutable reference to the vector of `ContigSummary`.
482 pub fn contigs_mut(&mut self) -> &mut HashMap<String, ContigSummary> {
483 &mut self.contigs
484 }
485
486 /// Get the ContigSummary associated with the given contig name or
487 /// add a new ContigSummary with the specified name and length if it doesn't exist.
488 ///
489 /// # Arguments
490 ///
491 /// * `contig` - The name of the contig.
492 /// * `length` - The length of the contig.
493 ///
494 /// # Returns
495 ///
496 /// A reference to the ContigSummary associated with the contig name.
497 ///
498 /// # Example
499 ///
500 /// ```rust,ignore
501 /// use std::collections::HashMap;
502 /// use your_crate::ContigSummary;
503 ///
504 /// let mut contig_map: HashMap<String, ContigSummary> = HashMap::new();
505 ///
506 /// // Get an existing contig or add a new one if it doesn't exist
507 /// let contig_name = "chr1".to_string();
508 /// let contig_length = 1000;
509 /// let contig_summary = contig_map.get_or_add_contig(contig_name.clone(), contig_length);
510 ///
511 /// // Now you can modify the ContigSummary fields or access its properties
512 /// println!("Contig name: {}", contig_summary.name);
513 /// println!("Contig length: {}", contig_summary.length);
514 /// ```
515 pub fn get_or_add_contig(&mut self, contig: &str, length: usize) -> &mut ContigSummary {
516 self.contigs
517 .entry(contig.to_string())
518 .or_insert(ContigSummary::new(contig.to_string(), length))
519 }
520
521 /// get the total yield
522 pub fn total_yield(&self) -> usize {
523 self.on_target_yield + self.off_target_yield
524 }
525}
526
527/// A struct representing a summary of conditions.
528///
529/// The `Summary` struct contains a hashmap where each key represents the name of a condition, and the corresponding value is a `ConditionSummary` struct
530/// containing the summary information for that condition.
531///
532/// # Fields
533///
534/// * `conditions` - A hashmap containing the summary information for each condition. The key is a string representing the name of the condition,
535/// and the value is a `ConditionSummary` struct containing the summary information for that condition.
536///
537/// # Examples
538///
539/// ```rust, ignore
540/// use std::collections::HashMap;
541/// use readfish_tools::{Summary, ConditionSummary};
542///
543/// // Create a new Summary
544/// let mut summary = Summary {
545/// conditions: HashMap::new(),
546/// };
547///
548/// // Add some condition summaries
549/// summary.conditions.insert(
550/// "ConditionA".to_string(),
551/// ConditionSummary {
552/// // ... fill in the details for ConditionA ...
553/// }
554/// );
555///
556/// summary.conditions.insert(
557/// "ConditionB".to_string(),
558/// ConditionSummary {
559/// // ... fill in the details for ConditionB ...
560/// }
561/// );
562///
563/// // Access a specific condition summary
564/// if let Some(condition_summary) = summary.conditions.get("ConditionA") {
565/// println!("Summary for ConditionA: {:?}", condition_summary);
566/// }
567/// ```
568#[derive(Debug)]
569pub struct Summary {
570 /// Conditions summary for a given region or barcode.
571 pub conditions: HashMap<String, ConditionSummary>,
572}
573
574impl fmt::Display for Summary {
575 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
576 // Todo rewrite to use Macro!
577 let mut condition_table = Table::new();
578 condition_table.add_row(Row::new(vec![
579 Cell::new("Condition")
580 .with_style(Attr::Bold)
581 .with_style(Attr::ForegroundColor(color::GREEN)),
582 Cell::new("Total reads")
583 .with_style(Attr::Bold)
584 .with_style(Attr::ForegroundColor(color::GREEN)),
585 Cell::new("# Off-target \nreads")
586 .with_style(Attr::Bold)
587 .with_style(Attr::ForegroundColor(color::GREEN)),
588 Cell::new("# On-target \nreads")
589 .with_style(Attr::Bold)
590 .with_style(Attr::ForegroundColor(color::GREEN)),
591 Cell::new("Total Yield")
592 .with_style(Attr::Bold)
593 .with_style(Attr::ForegroundColor(color::GREEN)),
594 Cell::new("Off Target\n Yield")
595 .with_style(Attr::Bold)
596 .with_style(Attr::ForegroundColor(color::GREEN)),
597 Cell::new("On Target\n yield")
598 .with_style(Attr::Bold)
599 .with_style(Attr::ForegroundColor(color::GREEN)),
600 Cell::new("Mean read\n length")
601 .with_style(Attr::Bold)
602 .with_style(Attr::ForegroundColor(color::GREEN)),
603 Cell::new("On target\nMean read\n length")
604 .with_style(Attr::Bold)
605 .with_style(Attr::ForegroundColor(color::GREEN)),
606 Cell::new("Off target\nMean read\n length")
607 .with_style(Attr::Bold)
608 .with_style(Attr::ForegroundColor(color::GREEN)),
609 ]));
610 for (condition_name, condition_summary) in &self.conditions {
611 condition_table.add_row(Row::new(vec![
612 Cell::new(condition_name).with_style(Attr::ForegroundColor(color::BRIGHT_YELLOW)),
613 // total reads
614 Cell::new(
615 &condition_summary
616 .total_reads
617 .to_formatted_string(&Locale::en),
618 )
619 .with_style(Attr::ForegroundColor(color::GREEN)),
620 // off target reads
621 Cell::new(&format!(
622 "{} ({:.2}%)",
623 condition_summary
624 .off_target_read_count
625 .to_formatted_string(&Locale::en),
626 condition_summary.off_target_percent
627 ))
628 .with_style(Attr::ForegroundColor(color::GREEN)),
629 // on target reads
630 Cell::new(&format!(
631 "{} ({:.2}%)",
632 condition_summary
633 .on_target_read_count
634 .to_formatted_string(&Locale::en),
635 100_f64 - condition_summary.off_target_percent
636 ))
637 .with_style(Attr::ForegroundColor(color::GREEN)),
638 // total yield
639 Cell::new(&format_bases(condition_summary.total_yield()))
640 .with_style(Attr::ForegroundColor(color::GREEN)),
641 // on target yield
642 Cell::new(&format_bases(condition_summary.off_target_yield))
643 .with_style(Attr::ForegroundColor(color::GREEN)),
644 // on target yield
645 Cell::new(&format_bases(condition_summary.on_target_yield))
646 .with_style(Attr::ForegroundColor(color::GREEN)),
647 // mean read length
648 Cell::new(&format_bases(condition_summary.mean_read_length()))
649 .with_style(Attr::ForegroundColor(color::GREEN)),
650 // on target mean read length
651 Cell::new(&format_bases(
652 condition_summary.on_target_mean_read_length(),
653 ))
654 .with_style(Attr::ForegroundColor(color::GREEN)),
655 // off target mean read length
656 Cell::new(&format_bases(
657 condition_summary.off_target_mean_read_length(),
658 ))
659 .with_style(Attr::ForegroundColor(color::GREEN)),
660 ]));
661
662 // writeln!(
663 // f,
664 // " Off-Target Mean Read Quality: {:.2}",
665 // condition_summary.off_target_mean_read_quality
666 // )?;
667 // writeln!(
668 // f,
669 // " On-Target Mean Read Quality: {:.2}",
670 // condition_summary.on_target_mean_read_quality
671 // )?;
672 // writeln!(f, " N50: {}", condition_summary.n50)?;
673 // writeln!(f, " On-Target N50: {}", condition_summary.on_target_n50)?;
674 // writeln!(f, " Off-Target N50: {}", condition_summary.off_target_n50)?;
675 }
676 condition_table.printstd();
677 writeln!(f, "Contigs:")?;
678
679 for condition_summary in self.conditions.values() {
680 let mut contig_table = Table::new();
681 contig_table.add_row(row![bFg->"Condition Name", BriH2->&condition_summary.name]);
682 // Create a custom format with left-leading spaces
683 contig_table.get_format();
684 contig_table.add_row(Row::new(vec![
685 Cell::new("Contig")
686 .with_style(Attr::Bold)
687 .with_style(Attr::ForegroundColor(color::GREEN)),
688 Cell::new("Contig Length")
689 .with_style(Attr::Bold)
690 .with_style(Attr::ForegroundColor(color::GREEN)),
691 Cell::new("Read count")
692 .with_style(Attr::Bold)
693 .with_style(Attr::ForegroundColor(color::GREEN)),
694 Cell::new("Yield")
695 .with_style(Attr::Bold)
696 .with_style(Attr::ForegroundColor(color::GREEN)),
697 Cell::new("Mean \nRead Length")
698 .with_style(Attr::Bold)
699 .with_style(Attr::ForegroundColor(color::GREEN)),
700 Cell::new("On Target\n Reads")
701 .with_style(Attr::Bold)
702 .with_style(Attr::ForegroundColor(color::GREEN)),
703 Cell::new("Off \nTarget Reads")
704 .with_style(Attr::Bold)
705 .with_style(Attr::ForegroundColor(color::GREEN)),
706 Cell::new("Mean read\n length")
707 .with_style(Attr::Bold)
708 .with_style(Attr::ForegroundColor(color::GREEN)),
709 Cell::new("On target\nMean read\n length")
710 .with_style(Attr::Bold)
711 .with_style(Attr::ForegroundColor(color::GREEN)),
712 Cell::new("Off target\nMean read\n length")
713 .with_style(Attr::Bold)
714 .with_style(Attr::ForegroundColor(color::GREEN)),
715 ]));
716 for (contig_name, contig_summary) in condition_summary
717 .contigs
718 .iter()
719 .sorted_by(|(key1, _), (key2, _)| natord::compare(key1, key2))
720 {
721 contig_table.add_row(Row::new(vec![
722 Cell::new(contig_name)
723 .with_style(Attr::Bold)
724 .with_style(Attr::ForegroundColor(color::GREEN)),
725 Cell::new(&contig_summary.length.to_formatted_string(&Locale::en))
726 .with_style(Attr::ForegroundColor(color::GREEN)),
727 Cell::new(
728 &contig_summary
729 .total_reads()
730 .to_formatted_string(&Locale::en),
731 )
732 .with_style(Attr::ForegroundColor(color::GREEN)),
733 Cell::new(&format_bases(contig_summary.total_bases))
734 .with_style(Attr::ForegroundColor(color::GREEN)),
735 Cell::new(&format_bases(contig_summary.mean_read_length()))
736 .with_style(Attr::ForegroundColor(color::GREEN)),
737 Cell::new(
738 &contig_summary
739 .on_target_read_count
740 .to_formatted_string(&Locale::en),
741 )
742 .with_style(Attr::ForegroundColor(color::GREEN)),
743 Cell::new(
744 &contig_summary
745 .off_target_read_count
746 .to_formatted_string(&Locale::en),
747 )
748 .with_style(Attr::ForegroundColor(color::GREEN)),
749 //mean read length
750 Cell::new(&format_bases(contig_summary.mean_read_length()))
751 .with_style(Attr::ForegroundColor(color::GREEN)),
752 // on target mean read length
753 Cell::new(&format_bases(contig_summary.on_target_mean_read_length()))
754 .with_style(Attr::ForegroundColor(color::GREEN)),
755 // off target mean read length
756 Cell::new(&format_bases(contig_summary.off_target_mean_read_length()))
757 .with_style(Attr::ForegroundColor(color::GREEN)),
758 ]));
759 // Print other fields from ContigSummary here
760 // For example:
761 // writeln!(f, " Contig Mean Read Length: {}", contig_summary.mean_read_length)?;
762 }
763 contig_table.printstd();
764 }
765 Ok(())
766 }
767}
768
769impl Summary {
770 /// Create a new `Summary` instance with default values for all fields.
771 fn new() -> Self {
772 Summary {
773 conditions: HashMap::new(),
774 }
775 }
776
777 /// Get the summary for the specified condition. If the condition does not exist in the
778 /// `Summary`, it will be created with default values.
779 ///
780 /// # Arguments
781 ///
782 /// * `condition_name`: A `String` representing the name or identifier of the condition.
783 ///
784 /// # Returns
785 ///
786 /// A reference to the `ConditionSummary` for the specified condition.
787 ///
788 /// # Example
789 ///
790 /// ```rust,ignore
791 /// use std::collections::HashMap;
792 ///
793 /// let mut summary = Summary::new();
794 ///
795 /// // Get or add the condition with the name "Condition A"
796 /// let condition_a = summary.conditions("Condition A".to_string());
797 ///
798 /// // Modify the fields of the condition summary
799 /// condition_a.set_total_reads(10000);
800 /// condition_a.set_on_target_read_count(8000);
801 /// condition_a.set_off_target_read_count(2000);
802 /// // ...
803 ///
804 /// // Get or add another condition
805 /// let condition_b = summary.conditions("Condition B".to_string());
806 /// // ...
807 /// ```
808 pub fn conditions<T: Deref<Target = str>>(
809 &mut self,
810 condition_name: T,
811 ) -> &mut ConditionSummary {
812 self.conditions
813 .entry(condition_name.to_string())
814 .or_insert(ConditionSummary::new(condition_name.to_string()))
815 }
816}
817/// Demultiplex PAF records based on the specified configuration.
818///
819/// This function takes two file paths as inputs, `toml_path` and `paf_path`, representing
820/// the paths to the TOML configuration file and the PAF file, respectively. The TOML configuration
821/// is read using the `readfish::Conf::from_file` function, and the PAF file is opened and checked using the
822/// `paf::open_paf_for_reading` function. The resulting PAF records are then demultiplexed based on the
823/// information provided in the configuration file.
824///
825/// Note: The current implementation initializes a new `paf::Paf` object with a hardcoded PAF file
826/// path "resources/test_paf_With_seq_sum.paf" and calls its `demultiplex` method with the parsed
827/// TOML configuration. However, the line is commented out, so the actual demultiplexing process
828/// is not performed. Please ensure that the proper PAF object is used and uncommented to perform
829/// the demultiplexing.
830///
831/// If there are barcodes present in the Conf TOML file, and the barcode_arrangement column is missing from the
832/// the sequencing summary file, the function will panic.
833///
834/// # Arguments
835///
836/// * `toml_path`: The file path to the TOML configuration file.
837/// * `paf_path`: The file path to the PAF file to be demultiplexed.
838///
839/// # Examples
840///
841/// ```rust,ignore
842/// use std::path::Path;
843/// demultiplex_paf("config.toml", "file.paf");
844/// ```
845///
846pub fn _demultiplex_paf(
847 toml_path: impl AsRef<Path>,
848 paf_path: impl AsRef<Path>,
849 sequencing_summary_path: Option<impl AsRef<Path>>,
850 print_summary: bool,
851 _csv_out: Option<impl AsRef<Path>>,
852) {
853 let toml_path = toml_path.as_ref();
854 let paf_path = paf_path.as_ref();
855 let mut toml = readfish::Conf::from_file(toml_path);
856 let mut paf = paf::Paf::new(paf_path);
857 let seq_sum =
858 sequencing_summary_path.map(|path| sequencing_summary::SeqSum::from_file(path).unwrap());
859 let mut seq_sum = seq_sum;
860 let mut summary = Summary::new();
861 paf.demultiplex(&mut toml, seq_sum.as_mut(), Some(&mut summary))
862 .unwrap();
863 if print_summary {
864 println!("{}", summary);
865 }
866}
867
868// PYTHON PyO3 STuff below ////////////////////////
869
870#[pyclass]
871/// Organise the data and methods for analysing a readfish PAF file.
872pub struct ReadfishSummary {
873 /// Stores the aggregated summary numbers for the readfish run
874 summary: RefCell<Summary>,
875 /// The config TOML file for the readfish tun
876 _conf: Option<Conf>,
877 /// The sequencing summary file
878 _sequencing_summary: Option<SeqSum>,
879 /// The PAF file
880 _paf_file: Option<Paf>,
881}
882
883impl Default for ReadfishSummary {
884 fn default() -> Self {
885 ReadfishSummary::new()
886 }
887}
888
889impl ReadfishSummary {
890 /// Creates a new instance of `ReadfishSummary` with default values.
891 ///
892 /// This function initializes a new `ReadfishSummary` struct with default values
893 /// for all fields. The `summary` field will be initialized with an empty `Summary`
894 /// instance. The `_conf`, `_sequencing_summary`, and `_paf_file` fields will be set
895 /// to `None`, indicating that they have not been initialized with specific values yet.
896 ///
897 /// # Returns
898 ///
899 /// A new `ReadfishSummary` instance with default values.
900 ///
901 /// # Examples
902 ///
903 /// ```
904 /// use readfish_tools::ReadfishSummary;
905 ///
906 /// let summary = ReadfishSummary::new();
907 /// assert_eq!(summary.has_conf(), false); // _conf field is not set yet
908 /// assert_eq!(summary.has_sequencing_summary(), false); // _sequencing_summary field is not set yet
909 /// assert_eq!(summary.has_paf_file(), false); // _paf_file field is not set yet
910 /// ```
911 pub fn new() -> Self {
912 ReadfishSummary {
913 summary: RefCell::new(Summary::new()),
914 _conf: None,
915 _sequencing_summary: None,
916 _paf_file: None,
917 }
918 }
919 /// Update a condition on the summary
920 // pub fn update_condition(
921 // &mut self,
922 // condition_name: &str,
923 // paf_record: PafRecord,
924 // on_target: bool,
925 // ) {
926 // let condition_summary = self.summary.borrow_mut().conditions(condition_name);
927 // condition_summary.update(paf_record, on_target).unwrap();
928 // }
929
930 /// Get the config TOML file for the readfish run.
931 pub fn conf(&self) -> Option<&Conf> {
932 self._conf.as_ref()
933 }
934
935 /// Set the config TOML file for the readfish run.
936 pub fn set_conf(&mut self, conf: Conf) {
937 self._conf = Some(conf);
938 }
939
940 /// Parse a path to a config file and add it to the ReadfishSummary
941 pub fn parse_conf_file(&mut self, conf_path: PathBuf) {
942 let conf = Conf::from_file(conf_path);
943 self._conf = Some(conf);
944 }
945
946 /// Get the sequencing summary file.
947 pub fn sequencing_summary(&self) -> Option<&SeqSum> {
948 self._sequencing_summary.as_ref()
949 }
950
951 /// Set the sequencing summary file.
952 pub fn set_sequencing_summary(&mut self, sequencing_summary: SeqSum) {
953 self._sequencing_summary = Some(sequencing_summary);
954 }
955
956 /// Parse a provided sequencing summary file path and set it on the summary
957 pub fn parse_sequencing_summary(&mut self, seq_sum_path: PathBuf) {
958 let sequencing_summary = SeqSum::from_file(seq_sum_path).unwrap();
959 self.set_sequencing_summary(sequencing_summary);
960 }
961
962 /// Get the PAF file.
963 pub fn paf_file(&self) -> Option<&Paf> {
964 self._paf_file.as_ref()
965 }
966
967 /// Set the PAF file.
968 pub fn set_paf_file(&mut self, paf_file: Paf) {
969 self._paf_file = Some(paf_file);
970 }
971
972 /// Check if the config TOML file is set.
973 pub fn has_conf(&self) -> bool {
974 self._conf.is_some()
975 }
976
977 /// Check if the sequencing summary file is set.
978 pub fn has_sequencing_summary(&self) -> bool {
979 self._sequencing_summary.is_some()
980 }
981
982 /// Check if the PAF file is set.
983 pub fn has_paf_file(&self) -> bool {
984 self._paf_file.is_some()
985 }
986}
987
988/// Implements methods for interacting with a ReadfishSummary instance from Python.
989#[pymethods]
990impl ReadfishSummary {
991 /// Creates a new instance of ReadfishSummary with default values.
992 /// Returns:
993 /// A new ReadfishSummary instance.
994 #[new]
995 #[pyo3(signature = ())]
996 fn py_new() -> PyResult<Self> {
997 Ok(ReadfishSummary::default())
998 }
999 /// Adds a PAF record to the ReadfishSummary.
1000 /// This method is not implemented and will always return an error.
1001 /// Returns:
1002 /// An error indicating that the method is not implemented.
1003 fn add_paf_record(&self) -> PyResult<()> {
1004 unimplemented!()
1005 }
1006
1007 /// Parses the provided sequencing summary file and sets it on the ReadfishSummary.
1008 /// Args:
1009 /// seq_sum_path: PathBuf - The path to the sequencing summary file to be parsed.
1010 /// Returns:
1011 /// Ok(()) if successful, or an error if the file could not be parsed or set on the summary.
1012 fn with_sequencing_summary(&mut self, seq_sum_path: PathBuf) -> PyResult<()> {
1013 self.parse_sequencing_summary(seq_sum_path);
1014 Ok(())
1015 }
1016
1017 /// Parses the provided TOML configuration file and sets it on the ReadfishSummary.
1018 /// Args:
1019 /// toml_path: PathBuf - The path to the TOML configuration file to be parsed.
1020 /// Returns:
1021 /// Ok(()) if successful, or an error if the file could not be parsed or set on the summary.
1022 fn with_toml_conf(&mut self, toml_path: PathBuf) -> PyResult<()> {
1023 self.parse_conf_file(toml_path);
1024 Ok(())
1025 }
1026
1027 /// Parses PAF lines from a Python iterator and updates the ReadfishSummary accordingly.
1028 ///
1029 /// This method takes a Python iterator that provides PAF lines as strings. It iterates over the lines,
1030 /// parses each line to extract relevant information, and updates the ReadfishSummary based on the
1031 /// extracted data. The extracted metadata is used to make decisions and update the internal state of
1032 /// the ReadfishSummary. Finishes by printing a summary of the parsed PAF files to stdout.
1033 ///
1034 /// # Arguments
1035 ///
1036 /// * `iter`: A Python iterator that provides PAF lines as strings.
1037 ///
1038 /// # Returns
1039 ///
1040 /// A `PyResult` indicating success or an error encountered during parsing.
1041 ///
1042 /// # Examples
1043 ///
1044 /// ```rust,ignore
1045 /// # use pyo3::types::PyIterator;
1046 /// # use readfish_tools::{ReadfishSummary, paf::Metadata, paf::_parse_paf_line};
1047 ///
1048 /// // Assuming we have valid inputs
1049 /// let mut readfish_summary = ReadfishSummary::default();
1050 /// let paf_lines: Vec<String> = vec![
1051 /// "read123 200 0 200 + contig123 300 0 300 200 200 50 ch=1".to_string(),
1052 /// "read456 150 0 150 - contig456 200 0 200 150 150 45 ch=2 ba=sampleB".to_string(),
1053 /// // Add more PAF lines as needed
1054 /// ];
1055 /// let py_iter = PyIterator::new(paf_lines.into_iter());
1056 ///
1057 /// let result = readfish_summary.parse_paf_from_iter(&py_iter);
1058 ///
1059 /// assert!(result.is_ok());
1060 /// ```
1061 fn parse_paf_from_iter(&mut self, iter: &PyIterator) -> PyResult<()> {
1062 let conf = self.conf().unwrap();
1063 for paf_line in iter {
1064 let paf_line = paf_line?;
1065 let (paf_line, meta_tuple): (String, (String, usize, Option<String>)) =
1066 paf_line.extract()?;
1067 let mut meta_data: Metadata = meta_tuple.into();
1068 let (paf_record, on_target, condition_name) =
1069 _parse_paf_line(paf_line, conf, Some(&mut meta_data), None).unwrap();
1070 {
1071 let mut x = self.summary.borrow_mut();
1072 let y = x.conditions(condition_name.as_str());
1073 y.update(paf_record, on_target).unwrap();
1074 }
1075 }
1076 Ok(())
1077 }
1078 /// Prints the summary of the `ReadfishSummary` to the standard output.
1079 ///
1080 /// This method borrows the `ReadfishSummary` immutably and prints its summary to the standard output.
1081 /// The summary is obtained by calling the `borrow` method on the `RefCell<Summary>` field of the
1082 /// `ReadfishSummary`.
1083 ///
1084 /// # Returns
1085 ///
1086 /// This function returns a `PyResult<()>` to indicate success or failure. If the summary is
1087 /// successfully printed, `Ok(())` is returned. If an error occurs during printing, an appropriate
1088 /// `PyErr` will be set, and `Err` will be returned.
1089 ///
1090 /// # Examples
1091 ///
1092 /// ```rust,ignore
1093 /// # use pyo3::prelude::*;
1094 /// # use std::cell::RefCell;
1095 /// # use std::collections::HashMap;
1096 /// # use std::path::PathBuf;
1097 /// # use std::error::Error;
1098 ///
1099 /// # #[pyclass]
1100 /// # pub struct ReadfishSummary {
1101 /// # // Fields of ReadfishSummary
1102 /// # // ...
1103 /// # }
1104 /// #
1105 /// # #[pymethods]
1106 /// # impl ReadfishSummary {
1107 /// # #[getter]
1108 /// # pub fn summary(&self) -> PyResult<Ref<Summary>> {
1109 /// # unimplemented!()
1110 /// # }
1111 /// #
1112 /// /// Method to print the summary of ReadfishSummary.
1113 /// pub fn print_summary(&self) -> PyResult<()> {
1114 /// println!("{}", self.summary.borrow());
1115 /// Ok(())
1116 /// }
1117 /// # }
1118 /// ```
1119 ///
1120 /// The method can be called on an instance of `ReadfishSummary` to print its summary.
1121 ///
1122 /// ```rust,ignore
1123 /// # use pyo3::prelude::*;
1124 /// # use std::cell::RefCell;
1125 /// # use std::collections::HashMap;
1126 /// # use std::path::PathBuf;
1127 /// # use std::error::Error;
1128 /// #
1129 /// # #[pyclass]
1130 /// # pub struct ReadfishSummary {
1131 /// # // Fields of ReadfishSummary
1132 /// # // ...
1133 /// # }
1134 /// #
1135 /// # #[pymethods]
1136 /// # impl ReadfishSummary {
1137 /// # #[getter]
1138 /// # pub fn summary(&self) -> PyResult<Ref<Summary>> {
1139 /// # unimplemented!()
1140 /// # }
1141 /// #
1142 /// # /// Method to print the summary of ReadfishSummary.
1143 /// # pub fn print_summary(&self) -> PyResult<()> {
1144 /// # println!("{}", self.summary.borrow());
1145 /// # Ok(())
1146 /// # }
1147 /// # }
1148 /// #
1149 /// # fn main() -> PyResult<()> {
1150 /// # Python::with_gil(|py| {
1151 /// # let gil = Python::acquire_gil();
1152 /// # let py = gil.python();
1153 /// #
1154 /// # // Create an instance of ReadfishSummary and call the print_summary method
1155 /// # let readfish_summary = ReadfishSummary { /* Initialize fields... */ };
1156 /// # readfish_summary.print_summary()?;
1157 /// # Ok(())
1158 /// # })
1159 /// # }
1160 /// ```
1161 pub fn print_summary(&self) -> PyResult<()> {
1162 println!("{}", self.summary.borrow());
1163 Ok(())
1164 }
1165}
1166
1167/// Formats the sum of two numbers as string.
1168#[pyfunction]
1169fn summarise_from_iter(
1170 toml_path: PathBuf,
1171 paf_path: PathBuf,
1172 seq_sum_path: PathBuf,
1173) -> PyResult<()> {
1174 _demultiplex_paf(
1175 toml_path,
1176 paf_path,
1177 Some(seq_sum_path),
1178 true,
1179 None::<String>,
1180 );
1181 Ok(())
1182}
1183
1184/// Summarizes the results of demultiplexing a PAF file using the provided TOML file and
1185/// prints the summary to stdout.
1186///
1187/// # Arguments
1188///
1189/// * `toml_path` - The path to the TOML file containing configuration settings for a readfish experiment.
1190/// * `paf_path` - The path to the PAF file containing alignment results.
1191/// * `seq_sum_path` - The sequencing summary file produced by ONTs guppy.
1192///
1193/// # Returns
1194///
1195/// This function returns a `PyResult<()>`, indicating success or an error encountered during processing.
1196///
1197/// # Panics
1198///
1199/// This function may panic if there are any critical errors during demultiplexing or file operations.
1200/// It's recommended to handle the possible panics in the calling code if necessary.
1201#[pyfunction]
1202fn summarise_paf(toml_path: PathBuf, paf_path: PathBuf, seq_sum_path: PathBuf) -> PyResult<()> {
1203 _demultiplex_paf(
1204 toml_path,
1205 paf_path,
1206 Some(seq_sum_path),
1207 true,
1208 None::<String>,
1209 );
1210 Ok(())
1211}
1212
1213/// A Python module implemented in Rust.
1214#[pymodule]
1215fn readfish_tools(_py: Python, m: &PyModule) -> PyResult<()> {
1216 m.add_function(wrap_pyfunction!(summarise_paf, m)?)?;
1217 m.add_class::<ReadfishSummary>()?;
1218 Ok(())
1219}
1220
1221#[cfg(test)]
1222mod tests {
1223 use crate::readfish::Region;
1224
1225 use super::*;
1226 use std::path::PathBuf;
1227 fn get_resource_dir() -> PathBuf {
1228 let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
1229 path.push("resources");
1230 path
1231 }
1232
1233 fn get_test_file(file: &str) -> PathBuf {
1234 let mut path = get_resource_dir();
1235 path.push(file);
1236 path
1237 }
1238
1239 #[test]
1240 fn test_update_lengths() {
1241 // Create a PAF record with a query length of 100
1242 let paf = PafRecord::new(
1243 "read123 100 0 100 + contig123 300 0 300 200 200 50 ch=1"
1244 .split(' ')
1245 .collect(),
1246 )
1247 .unwrap();
1248
1249 // Create a MeanReadLengths instance
1250 let mut mean_lengths = MeanReadLengths::new();
1251
1252 // Initially, all mean lengths should be zero
1253 assert_eq!(mean_lengths.on_target, 0);
1254 assert_eq!(mean_lengths.off_target, 0);
1255 assert_eq!(mean_lengths.total, 0);
1256
1257 // Update with an on-target read
1258 mean_lengths.update_lengths(&paf, true);
1259
1260 // After the update, only on_target and total should be updated
1261 assert_eq!(mean_lengths.on_target, 100);
1262 assert_eq!(mean_lengths.off_target, 0);
1263 assert_eq!(mean_lengths.total, 100);
1264
1265 // Update with an off-target read
1266 mean_lengths.update_lengths(&paf, false);
1267
1268 // After the update, off_target and total should be updated
1269 assert_eq!(mean_lengths.on_target, 100);
1270 assert_eq!(mean_lengths.off_target, 100);
1271 assert_eq!(mean_lengths.total, 100);
1272 // Create a PAF record with a query length of 100
1273 let paf = PafRecord::new(
1274 "read123 150 0 100 + contig123 300 0 300 200 200 50 ch=1"
1275 .split(' ')
1276 .collect(),
1277 )
1278 .unwrap();
1279 // Update with an off-target read with a different length
1280 mean_lengths.update_lengths(&paf, false);
1281
1282 // After the update, off_target and total should be updated
1283 assert_eq!(mean_lengths.on_target, 100);
1284 assert_eq!(mean_lengths.off_target, 125);
1285 assert_eq!(mean_lengths.total, 116);
1286 }
1287
1288 #[test]
1289 fn test_parse_sequencing_summary() {
1290 // Create a temporary directory to store the sequencing summary file
1291
1292 // Create a new ReadfishSummary instance
1293 let mut summary = ReadfishSummary::new();
1294
1295 // Parse the sequencing summary file and update the summary
1296 let seq_sum_path = get_test_file("seq_sum_PAK09329.txt");
1297 summary.parse_sequencing_summary(seq_sum_path);
1298 // Check that the _sequencing_summary field is now set with the parsed sequencing summary
1299 assert!(summary.has_sequencing_summary());
1300
1301 // Get the reference to the parsed sequencing summary
1302 let seq_summary = summary.sequencing_summary().unwrap();
1303 assert_eq!(seq_summary.record_buffer.len(), 100000_usize);
1304 }
1305
1306 #[test]
1307 fn test_parse_conf_file() {
1308 // Create a temporary directory to store the config file
1309 let conf_file = get_test_file("RAPID_CNS2.toml");
1310
1311 // Create a new ReadfishSummary instance
1312 let mut summary = ReadfishSummary::new();
1313
1314 // Parse the config file and update the summary
1315 summary.parse_conf_file(conf_file);
1316
1317 // Check that the _conf field is now set with the parsed config
1318 assert!(summary.has_conf());
1319 assert!(!summary.has_sequencing_summary());
1320 let conf = summary.conf().unwrap();
1321
1322 let (control, condition) = conf.get_conditions(1, None::<String>).unwrap();
1323 assert!(!control);
1324 let region = condition.any().downcast_ref::<Region>().unwrap();
1325 assert_eq!(region.condition.name, "Direct_CNS");
1326 assert_eq!(region.condition.min_chunks, 1);
1327 assert_eq!(region.condition.max_chunks, 4);
1328
1329 assert_eq!(region.condition.single_off, "unblock".into());
1330 assert_eq!(region.condition.multi_off, "unblock".into());
1331 assert_eq!(region.condition.single_on, "stop_receiving".into());
1332 assert_eq!(region.condition.multi_on, "stop_receiving".into());
1333 assert_eq!(region.condition.no_seq, "proceed".into());
1334 assert_eq!(region.condition.no_map, "proceed".into());
1335 }
1336}