bcl2fq_stats/
lib.rs

1pub mod cli;
2pub mod distance;
3pub mod models;
4pub mod utils;
5
6use crate::cli::{Command, Parser};
7use crate::distance::hamming_distance;
8use crate::models::{Bcl2FqStats, ConversionResult};
9use crate::utils::sort_hashmap;
10use log::info;
11use models::UnknownBarcode;
12use serde_json::from_str;
13
14use std::collections::HashMap;
15use std::fs::File;
16use std::io::Read;
17
18const HEADER: &str = "sq_id\tbarcode\tread_count\tpossible_origin_index";
19const MAX_COUNT_UNDETERMINED: usize = 10;
20
21/// Parsing ConversionResult (per lane) from the bcl2fastq json file
22///
23/// # Arguments
24/// - `conversion_result`: ConversionResult object
25/// - `barcode_counter`: a hash map collecting the count for each sample
26/// - `barcode_list`: a hash map collecting the conversion between sample id and barcode
27///
28/// # Returns
29/// - None
30///
31/// # Example
32/// ```
33/// use bcl2fq_stats::models::{ConversionResult, DemuxResult};
34/// use std::collections::HashMap;
35/// ```
36fn collect_lane_barcode_count(
37    conversion_result: &ConversionResult,
38    barcode_counter: &mut HashMap<String, u64>,
39    barcode_list: &mut HashMap<String, String>,
40) -> Result<(), String> {
41    info!(
42        "Collecting sample barcode count from lane {}",
43        conversion_result.LaneNumber
44    );
45    for demux_sample in &conversion_result.DemuxResults {
46        let sample_id: &String = &demux_sample.SampleId;
47        let barcode: &String = &demux_sample.IndexMetrics[0].IndexSequence;
48        let read_count: u64 = demux_sample.NumberReads;
49
50        match barcode_counter.get(sample_id) {
51            Some(count) => {
52                barcode_counter.insert(sample_id.clone(), count + read_count);
53            }
54            None => {
55                barcode_counter.insert(sample_id.clone(), read_count);
56            }
57        };
58
59        barcode_list.insert(sample_id.clone(), barcode.to_string());
60    }
61    Ok(())
62}
63
64/// Parsing the Undetermined barcode section
65///
66/// # Arguments
67/// - unknown_barcode_lane: the unknown barcode object for each lane
68/// - undetermined_barcode_counter: a hashmap collecting the count for each undetermined barcodes
69fn collect_lane_undetermined_barcode(
70    unknown_barcode_lane: &UnknownBarcode,
71    undetermined_barcode_counter: &mut HashMap<String, u64>,
72) {
73    info!(
74        "Collecting undetermined barcode from lane {}",
75        unknown_barcode_lane.Lane
76    );
77    for (undetermined_barcode, read_count) in unknown_barcode_lane.Barcodes.iter() {
78        match undetermined_barcode_counter.get(undetermined_barcode) {
79            Some(count) => {
80                undetermined_barcode_counter
81                    .insert(undetermined_barcode.clone(), count + read_count);
82            }
83            None => {
84                undetermined_barcode_counter.insert(undetermined_barcode.clone(), *read_count);
85            }
86        };
87    }
88}
89
90/// Printing out the top count undetermined barcode, with their
91/// possible mismatched originated-barcode ID
92///
93/// # Arguments
94/// - `undetermined_barcode_counter`: a hashmap storing the undetermined barcode sequence and their
95/// counts
96/// - `barcode_list`: a barcode ID to barcode sequence hash map
97/// - `max_distance`: how many mismatch to tolerate before calling a barcode match
98fn print_undetermined_barcode(
99    undetermined_barcode_counter: &mut HashMap<String, u64>,
100    barcode_list: &HashMap<String, String>,
101    max_distance: &u8,
102) -> Result<(), String> {
103    let sorted_undetermined_barcode_count = sort_hashmap(undetermined_barcode_counter)?;
104
105    for it in sorted_undetermined_barcode_count.iter().enumerate() {
106        let (i, (undetermined_barcode, barcode_count)) = it;
107        let list_of_possible_barcodes: Vec<String> = barcode_list
108            .iter()
109            .map(|(barcode_id, barcode)| {
110                (
111                    barcode_id,
112                    hamming_distance(barcode.as_bytes(), undetermined_barcode.as_bytes()).unwrap(),
113                )
114            })
115            .filter(|(_barcode_id, score)| score < max_distance)
116            .map(|(barcode_id, _score)| barcode_id.clone())
117            .collect();
118        let possible_barcodes: String = list_of_possible_barcodes.join(",");
119
120        println!(
121            "Undetermined\t{}\t{}\t{}",
122            undetermined_barcode, barcode_count, possible_barcodes
123        );
124        if i == MAX_COUNT_UNDETERMINED - 1 {
125            return Ok(());
126        }
127    }
128    Ok(())
129}
130
131/// printing out the barcode count
132///
133/// # Arguments
134/// - `barcode_counter`: hash map of sample id/count
135/// - `barcode_list`: hash map of sample id/barcode
136fn print_barcode_count(
137    barcode_counter: &HashMap<String, u64>,
138    barcode_list: &HashMap<String, String>,
139) -> Result<(), String> {
140    // print out the barcode counts
141    let sorted_barcode_count = sort_hashmap(barcode_counter)?;
142    for (sample_id, barcode_count) in sorted_barcode_count.iter() {
143        let barcode: &String = barcode_list
144            .get(&(*sample_id).clone())
145            .ok_or(format!("No barcode collected for {}", sample_id))?;
146        println!("{}\t{}\t{}\t", sample_id, barcode, barcode_count);
147    }
148    Ok(())
149}
150
151pub fn run() -> Result<(), String> {
152    // Read in cli arguments
153    let args = Command::parse();
154
155    info!("Reading {}", &args.json_file);
156    // Read json file content as string
157    // TODO: can we stream it?
158    let mut file: File =
159        File::open(args.json_file).map_err(|_| "The given json file is not found".to_string())?;
160    let mut data: String = String::new();
161    file.read_to_string(&mut data).map_err(|e| e.to_string())?;
162    let bcl2fastq_stats: Bcl2FqStats =
163        from_str(&data).map_err(|_| "Is this a bcl2fastq Stats.json file?".to_string())?;
164
165    println!("{}", HEADER);
166    let mut barcode_list: HashMap<String, String> = HashMap::new();
167    let mut barcode_counter: HashMap<String, u64> = HashMap::new();
168
169    // parse demux result from all lanes
170    let _ = &bcl2fastq_stats
171        .ConversionResults
172        .into_iter()
173        .map(|conversion_result| {
174            collect_lane_barcode_count(&conversion_result, &mut barcode_counter, &mut barcode_list)
175        })
176        .collect::<Vec<Result<(), String>>>();
177
178    // and print out the demuxed counts
179    print_barcode_count(&barcode_counter, &barcode_list)?;
180
181    // now look at undetermined barcode section
182    let mut undetermined_barcode_counter: HashMap<String, u64> = HashMap::new();
183
184    // first collecting the counts
185    let _ = bcl2fastq_stats
186        .UnknownBarcodes
187        .iter()
188        .map(|unknown_barcode_for_lane| {
189            collect_lane_undetermined_barcode(
190                unknown_barcode_for_lane,
191                &mut undetermined_barcode_counter,
192            )
193        })
194        .collect::<Vec<_>>();
195
196    // and match with the known barcodes, and print them out
197    print_undetermined_barcode(
198        &mut undetermined_barcode_counter,
199        &barcode_list,
200        &args.max_distance,
201    )?;
202
203    Ok(())
204}