use crate::qc::{FilteringStats, QcStats};
use serde::Serialize;
use std::io::Write;
#[derive(Debug, Clone)]
pub struct JsonConfig {
pub pretty: bool,
pub include_timing: bool,
}
impl Default for JsonConfig {
fn default() -> Self {
Self {
pretty: true,
include_timing: true,
}
}
}
impl JsonConfig {
pub fn new() -> Self {
Self::default()
}
pub fn with_pretty(mut self, pretty: bool) -> Self {
self.pretty = pretty;
self
}
pub fn with_timing(mut self, include: bool) -> Self {
self.include_timing = include;
self
}
}
#[derive(Debug, Clone, Serialize)]
pub struct JsonReport {
pub summary: Summary,
pub read1_before_filtering: ReadStats,
pub read1_after_filtering: ReadStats,
#[serde(skip_serializing_if = "Option::is_none")]
pub read2_before_filtering: Option<ReadStats>,
#[serde(skip_serializing_if = "Option::is_none")]
pub read2_after_filtering: Option<ReadStats>,
pub filtering_result: FilteringResult,
pub duplication: DuplicationInfo,
#[serde(skip_serializing_if = "Option::is_none")]
pub adapter_cutting: Option<AdapterStats>,
#[serde(skip_serializing_if = "Option::is_none")]
pub insert_size: Option<InsertSizeInfo>,
#[serde(skip_serializing_if = "Option::is_none")]
pub correction: Option<CorrectionInfo>,
pub command: String,
}
#[derive(Debug, Clone, Serialize)]
pub struct Summary {
pub fastars_version: String,
pub sequencing_type: String,
pub before_filtering: BeforeAfterSummary,
pub after_filtering: BeforeAfterSummary,
}
#[derive(Debug, Clone, Serialize)]
pub struct BeforeAfterSummary {
pub total_reads: u64,
pub total_bases: u64,
pub q20_bases: u64,
pub q30_bases: u64,
pub q20_rate: f64,
pub q30_rate: f64,
pub read1_mean_length: f64,
#[serde(skip_serializing_if = "Option::is_none")]
pub read2_mean_length: Option<f64>,
pub gc_content: f64,
}
#[derive(Debug, Clone, Serialize)]
pub struct ReadStats {
pub total_reads: u64,
pub total_bases: u64,
pub q20_bases: u64,
pub q30_bases: u64,
pub mean_quality: f64,
pub mean_length: f64,
pub gc_content: f64,
pub quality_curve: Vec<f64>,
pub content_curves: ContentCurves,
pub quality_histogram: Vec<u64>,
pub gc_histogram: Vec<u64>,
pub length_histogram: LengthHistogram,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub overrepresented_sequences: Vec<OverrepresentedSeq>,
}
#[derive(Debug, Clone, Serialize)]
pub struct ContentCurves {
#[serde(rename = "A")]
pub a: Vec<f64>,
#[serde(rename = "T")]
pub t: Vec<f64>,
#[serde(rename = "G")]
pub g: Vec<f64>,
#[serde(rename = "C")]
pub c: Vec<f64>,
#[serde(rename = "N")]
pub n: Vec<f64>,
pub gc: Vec<f64>,
}
#[derive(Debug, Clone, Serialize)]
pub struct LengthHistogram {
pub min: usize,
pub max: usize,
pub mean: f64,
pub median: usize,
pub distribution: Vec<LengthBin>,
}
#[derive(Debug, Clone, Serialize)]
pub struct LengthBin {
pub length: usize,
pub count: u64,
}
#[derive(Debug, Clone, Serialize)]
pub struct OverrepresentedSeq {
pub sequence: String,
pub count: u64,
pub percentage: f64,
#[serde(skip_serializing_if = "Option::is_none")]
pub possible_source: Option<String>,
}
#[derive(Debug, Clone, Default, Serialize)]
pub struct FilteringResult {
pub passed_filter_reads: u64,
pub low_quality_reads: u64,
pub too_many_n_reads: u64,
pub too_short_reads: u64,
pub too_long_reads: u64,
pub low_complexity_reads: u64,
}
#[derive(Debug, Clone, Serialize)]
pub struct DuplicationInfo {
pub rate: f64,
pub histogram: DuplicationHistogram,
}
#[derive(Debug, Clone, Serialize)]
pub struct DuplicationHistogram {
#[serde(rename = "1")]
pub level_1: u64,
#[serde(rename = "2")]
pub level_2: u64,
#[serde(rename = "3")]
pub level_3: u64,
#[serde(rename = "4")]
pub level_4: u64,
#[serde(rename = "5")]
pub level_5: u64,
#[serde(rename = "6")]
pub level_6: u64,
#[serde(rename = "7")]
pub level_7: u64,
#[serde(rename = "8")]
pub level_8: u64,
#[serde(rename = "9")]
pub level_9: u64,
#[serde(rename = "10+")]
pub level_10_plus: u64,
}
#[derive(Debug, Clone, Serialize)]
pub struct AdapterStats {
#[serde(skip_serializing_if = "Option::is_none")]
pub adapter_r1: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub adapter_r2: Option<String>,
pub reads_with_adapter: u64,
pub bases_trimmed: u64,
}
#[derive(Debug, Clone, Serialize)]
pub struct InsertSizeInfo {
pub peak: usize,
pub mean: f64,
pub std_dev: f64,
pub median: usize,
pub count: u64,
pub detection_rate: f64,
pub histogram: Vec<u64>,
}
#[derive(Debug, Clone, Default, Serialize)]
pub struct CorrectionInfo {
pub pairs_processed: u64,
pub pairs_with_overlap: u64,
pub pairs_corrected: u64,
pub bases_corrected: u64,
pub bases_corrected_r1: u64,
pub bases_corrected_r2: u64,
pub overlap_rate: f64,
pub correction_rate: f64,
}
impl CorrectionInfo {
pub fn from_stats(stats: &crate::correction::CorrectionStats) -> Self {
Self {
pairs_processed: stats.pairs_processed,
pairs_with_overlap: stats.pairs_with_overlap,
pairs_corrected: stats.pairs_corrected,
bases_corrected: stats.bases_corrected,
bases_corrected_r1: stats.bases_corrected_r1,
bases_corrected_r2: stats.bases_corrected_r2,
overlap_rate: stats.overlap_rate(),
correction_rate: stats.correction_rate(),
}
}
}
impl InsertSizeInfo {
pub fn from_stats(stats: &crate::qc::InsertSizeStats) -> Self {
Self {
peak: stats.peak(),
mean: stats.mean(),
std_dev: stats.std_dev(),
median: stats.median(),
count: stats.count(),
detection_rate: stats.detection_rate(),
histogram: stats.histogram().to_vec(),
}
}
}
#[derive(Debug, Clone, Serialize)]
pub struct TimingInfo {
pub total_seconds: f64,
pub reads_per_second: f64,
pub bases_per_second: f64,
}
impl JsonReport {
pub fn from_filtering_stats(
filtering_stats: &FilteringStats,
command: String,
) -> Self {
let is_paired = false; let sequencing_type = if is_paired { "paired end" } else { "single end" };
Self {
summary: Summary {
fastars_version: env!("CARGO_PKG_VERSION").to_string(),
sequencing_type: sequencing_type.to_string(),
before_filtering: BeforeAfterSummary::from_stats(&filtering_stats.before),
after_filtering: BeforeAfterSummary::from_stats(&filtering_stats.after),
},
read1_before_filtering: ReadStats::from_qc_stats(&filtering_stats.before),
read1_after_filtering: ReadStats::from_qc_stats(&filtering_stats.after),
read2_before_filtering: None,
read2_after_filtering: None,
filtering_result: FilteringResult {
passed_filter_reads: filtering_stats.after.total_reads,
low_quality_reads: 0, too_many_n_reads: 0,
too_short_reads: 0,
too_long_reads: 0,
low_complexity_reads: 0,
},
duplication: DuplicationInfo::from_stats(&filtering_stats.after),
adapter_cutting: None,
insert_size: filtering_stats.after.insert_size().map(InsertSizeInfo::from_stats),
correction: None,
command,
}
}
pub fn from_qc_stats(stats: &QcStats, command: String) -> Self {
let sequencing_type = "single end";
Self {
summary: Summary {
fastars_version: env!("CARGO_PKG_VERSION").to_string(),
sequencing_type: sequencing_type.to_string(),
before_filtering: BeforeAfterSummary::from_stats(stats),
after_filtering: BeforeAfterSummary::from_stats(stats),
},
read1_before_filtering: ReadStats::from_qc_stats(stats),
read1_after_filtering: ReadStats::from_qc_stats(stats),
read2_before_filtering: None,
read2_after_filtering: None,
filtering_result: FilteringResult::default(),
duplication: DuplicationInfo::from_stats(stats),
adapter_cutting: None,
insert_size: stats.insert_size().map(InsertSizeInfo::from_stats),
correction: None,
command,
}
}
pub fn from_qc_stats_pair(
before: &QcStats,
after: &QcStats,
is_paired: bool,
command: String,
) -> Self {
let sequencing_type = if is_paired { "paired end" } else { "single end" };
let mut summary_before = BeforeAfterSummary::from_stats(before);
let mut summary_after = BeforeAfterSummary::from_stats(after);
if is_paired {
summary_before.read2_mean_length = Some(summary_before.read1_mean_length);
summary_after.read2_mean_length = Some(summary_after.read1_mean_length);
}
Self {
summary: Summary {
fastars_version: env!("CARGO_PKG_VERSION").to_string(),
sequencing_type: sequencing_type.to_string(),
before_filtering: summary_before,
after_filtering: summary_after,
},
read1_before_filtering: ReadStats::from_qc_stats(before),
read1_after_filtering: ReadStats::from_qc_stats(after),
read2_before_filtering: if is_paired {
Some(ReadStats::from_qc_stats(before))
} else {
None
},
read2_after_filtering: if is_paired {
Some(ReadStats::from_qc_stats(after))
} else {
None
},
filtering_result: FilteringResult {
passed_filter_reads: after.total_reads,
low_quality_reads: before.total_reads.saturating_sub(after.total_reads),
too_many_n_reads: 0,
too_short_reads: 0,
too_long_reads: 0,
low_complexity_reads: 0,
},
duplication: DuplicationInfo::from_stats(after),
adapter_cutting: None,
insert_size: after.insert_size().map(InsertSizeInfo::from_stats),
correction: None,
command,
}
}
pub fn with_adapter_stats(mut self, adapter_stats: AdapterStats) -> Self {
self.adapter_cutting = Some(adapter_stats);
self
}
pub fn with_correction(mut self, correction: CorrectionInfo) -> Self {
self.correction = Some(correction);
self
}
pub fn with_read2_stats(
mut self,
before: ReadStats,
after: ReadStats,
) -> Self {
self.read2_before_filtering = Some(before);
self.read2_after_filtering = Some(after);
self.summary.sequencing_type = "paired end".to_string();
self
}
pub fn with_insert_size(mut self, insert_size: InsertSizeInfo) -> Self {
self.insert_size = Some(insert_size);
self
}
}
impl BeforeAfterSummary {
pub fn from_stats(stats: &QcStats) -> Self {
let q20_rate = stats.q20_percent() / 100.0;
let q30_rate = stats.q30_percent() / 100.0;
let q20_bases = (stats.total_bases as f64 * q20_rate) as u64;
let q30_bases = (stats.total_bases as f64 * q30_rate) as u64;
Self {
total_reads: stats.total_reads,
total_bases: stats.total_bases,
q20_bases,
q30_bases,
q20_rate,
q30_rate,
read1_mean_length: stats.mean_length(),
read2_mean_length: None,
gc_content: stats.mean_gc() / 100.0,
}
}
}
impl ReadStats {
pub fn from_qc_stats(stats: &QcStats) -> Self {
let q20_rate = stats.q20_percent() / 100.0;
let q30_rate = stats.q30_percent() / 100.0;
let q20_bases = (stats.total_bases as f64 * q20_rate) as u64;
let q30_bases = (stats.total_bases as f64 * q30_rate) as u64;
let quality_curve: Vec<f64> = stats.quality.position_stats()
.iter()
.map(|(sum, count)| {
if *count == 0 {
0.0
} else {
*sum as f64 / *count as f64
}
})
.collect();
let positions = stats.base_content.len();
let mut a_curve = Vec::with_capacity(positions);
let mut t_curve = Vec::with_capacity(positions);
let mut g_curve = Vec::with_capacity(positions);
let mut c_curve = Vec::with_capacity(positions);
let mut n_curve = Vec::with_capacity(positions);
let mut gc_curve = Vec::with_capacity(positions);
for pos in 0..positions {
let ratios = stats.base_content.get_ratios(pos);
a_curve.push(ratios[0] * 100.0);
t_curve.push(ratios[1] * 100.0);
g_curve.push(ratios[2] * 100.0);
c_curve.push(ratios[3] * 100.0);
n_curve.push(ratios[4] * 100.0);
gc_curve.push((ratios[2] + ratios[3]) * 100.0);
}
let length_dist: Vec<LengthBin> = stats.length.distribution()
.iter()
.map(|(&len, &count)| LengthBin { length: len, count })
.collect();
let overrep: Vec<OverrepresentedSeq> = stats.kmer.top_overrepresented()
.into_iter()
.filter(|(_, count)| {
let pct = (*count as f64 / stats.total_reads.max(1) as f64) * 100.0;
pct >= 0.1 })
.map(|(seq, count)| {
let percentage = (count as f64 / stats.total_reads.max(1) as f64) * 100.0;
OverrepresentedSeq {
sequence: String::from_utf8_lossy(&seq).to_string(),
count,
percentage,
possible_source: None, }
})
.collect();
Self {
total_reads: stats.total_reads,
total_bases: stats.total_bases,
q20_bases,
q30_bases,
mean_quality: stats.mean_quality(),
mean_length: stats.mean_length(),
gc_content: stats.mean_gc() / 100.0,
quality_curve,
content_curves: ContentCurves {
a: a_curve,
t: t_curve,
g: g_curve,
c: c_curve,
n: n_curve,
gc: gc_curve,
},
quality_histogram: stats.quality.histogram().to_vec(),
gc_histogram: stats.gc.histogram().to_vec(),
length_histogram: LengthHistogram {
min: stats.length.min_length(),
max: stats.length.max_length(),
mean: stats.mean_length(),
median: stats.length.median_length(),
distribution: length_dist,
},
overrepresented_sequences: overrep,
}
}
}
impl DuplicationInfo {
pub fn from_stats(stats: &QcStats) -> Self {
let hist = stats.duplication.histogram_snapshot();
Self {
rate: stats.duplication_rate(),
histogram: DuplicationHistogram {
level_1: hist[0],
level_2: hist[1],
level_3: hist[2],
level_4: hist[3],
level_5: hist[4],
level_6: hist[5],
level_7: hist[6],
level_8: hist[7],
level_9: hist[8],
level_10_plus: hist[9],
},
}
}
}
pub fn write_json_report<W: Write>(stats: &QcStats, writer: &mut W) -> anyhow::Result<()> {
let report = JsonReport::from_qc_stats(stats, String::new());
serde_json::to_writer_pretty(writer, &report)?;
Ok(())
}
pub fn write_json_report_with_config<W: Write>(
stats: &QcStats,
config: &JsonConfig,
writer: &mut W,
) -> anyhow::Result<()> {
let report = JsonReport::from_qc_stats(stats, String::new());
if config.pretty {
serde_json::to_writer_pretty(writer, &report)?;
} else {
serde_json::to_writer(writer, &report)?;
}
Ok(())
}
pub fn write_filtering_json_report<W: Write>(
filtering_stats: &FilteringStats,
command: &str,
writer: &mut W,
) -> anyhow::Result<()> {
let report = JsonReport::from_filtering_stats(filtering_stats, command.to_string());
serde_json::to_writer_pretty(writer, &report)?;
Ok(())
}
pub fn write_full_json_report<W: Write>(
report: &JsonReport,
config: &JsonConfig,
writer: &mut W,
) -> anyhow::Result<()> {
if config.pretty {
serde_json::to_writer_pretty(writer, report)?;
} else {
serde_json::to_writer(writer, report)?;
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use crate::qc::Mode;
fn create_test_stats() -> QcStats {
let mut stats = QcStats::new(Mode::Short);
stats.update_raw(b"ATGCATGC", b"IIIIIIII"); stats.update_raw(b"GCTAGCTA", b"????????"); stats.update_raw(b"ATATATATAT", b"5555555555"); stats
}
#[test]
fn test_write_json_report() {
let stats = create_test_stats();
let mut output = Vec::new();
write_json_report(&stats, &mut output).unwrap();
assert!(!output.is_empty());
let json_str = String::from_utf8(output).unwrap();
assert!(json_str.contains("fastars_version"));
assert!(json_str.contains("read1_before_filtering"));
}
#[test]
fn test_json_report_structure() {
let stats = create_test_stats();
let report = JsonReport::from_qc_stats(&stats, "fastars -i test.fq".to_string());
assert_eq!(report.summary.sequencing_type, "single end");
assert_eq!(report.read1_before_filtering.total_reads, 3);
assert_eq!(report.command, "fastars -i test.fq");
}
#[test]
fn test_json_report_quality_curve() {
let stats = create_test_stats();
let report = JsonReport::from_qc_stats(&stats, String::new());
assert!(!report.read1_before_filtering.quality_curve.is_empty());
}
#[test]
fn test_json_report_content_curves() {
let stats = create_test_stats();
let report = JsonReport::from_qc_stats(&stats, String::new());
let content = &report.read1_before_filtering.content_curves;
assert!(!content.a.is_empty());
assert!(!content.t.is_empty());
assert!(!content.g.is_empty());
assert!(!content.c.is_empty());
}
#[test]
fn test_json_report_length_histogram() {
let stats = create_test_stats();
let report = JsonReport::from_qc_stats(&stats, String::new());
let length = &report.read1_before_filtering.length_histogram;
assert!(length.min > 0);
assert!(length.max >= length.min);
}
#[test]
fn test_json_report_duplication() {
let stats = create_test_stats();
let report = JsonReport::from_qc_stats(&stats, String::new());
assert!(report.duplication.rate >= 0.0);
assert!(report.duplication.rate <= 100.0);
}
#[test]
fn test_json_config_compact() {
let stats = create_test_stats();
let config = JsonConfig::new().with_pretty(false);
let mut output = Vec::new();
write_json_report_with_config(&stats, &config, &mut output).unwrap();
let json_str = String::from_utf8(output).unwrap();
assert!(!json_str.contains("\n "));
}
#[test]
fn test_filtering_stats_report() {
let mut filtering_stats = FilteringStats::new(Mode::Short);
filtering_stats.before.update_raw(b"ATGC", b"IIII");
filtering_stats.before.update_raw(b"GCTA", b"!!!!");
filtering_stats.after.update_raw(b"ATGC", b"IIII");
let mut output = Vec::new();
write_filtering_json_report(&filtering_stats, "fastars -i test.fq", &mut output).unwrap();
let json_str = String::from_utf8(output).unwrap();
assert!(json_str.contains("filtering_result"));
assert!(json_str.contains("passed_filter_reads"));
}
#[test]
fn test_json_report_serialization() {
let stats = create_test_stats();
let report = JsonReport::from_qc_stats(&stats, String::new());
let json = serde_json::to_string(&report).unwrap();
assert!(!json.is_empty());
let _: serde_json::Value = serde_json::from_str(&json).unwrap();
}
#[test]
fn test_before_after_summary() {
let stats = create_test_stats();
let summary = BeforeAfterSummary::from_stats(&stats);
assert_eq!(summary.total_reads, 3);
assert!(summary.gc_content >= 0.0 && summary.gc_content <= 1.0);
assert!(summary.q20_rate >= 0.0 && summary.q20_rate <= 1.0);
assert!(summary.q30_rate >= 0.0 && summary.q30_rate <= 1.0);
}
#[test]
fn test_read_stats_from_empty() {
let stats = QcStats::new(Mode::Short);
let read_stats = ReadStats::from_qc_stats(&stats);
assert_eq!(read_stats.total_reads, 0);
assert_eq!(read_stats.total_bases, 0);
assert!(read_stats.quality_curve.is_empty());
}
#[test]
fn test_json_report_with_adapter_stats() {
let stats = create_test_stats();
let adapter_stats = AdapterStats {
adapter_r1: Some("AGATCGGAAGAG".to_string()),
adapter_r2: None,
reads_with_adapter: 100,
bases_trimmed: 500,
};
let report = JsonReport::from_qc_stats(&stats, String::new())
.with_adapter_stats(adapter_stats);
assert!(report.adapter_cutting.is_some());
let adapter = report.adapter_cutting.unwrap();
assert_eq!(adapter.reads_with_adapter, 100);
}
#[test]
fn test_json_report_paired_end() {
let stats = create_test_stats();
let read2_before = ReadStats::from_qc_stats(&stats);
let read2_after = ReadStats::from_qc_stats(&stats);
let report = JsonReport::from_qc_stats(&stats, String::new())
.with_read2_stats(read2_before, read2_after);
assert_eq!(report.summary.sequencing_type, "paired end");
assert!(report.read2_before_filtering.is_some());
assert!(report.read2_after_filtering.is_some());
}
}