1use cyanea_core::{CyaneaError, Result};
7
8#[derive(Debug, Clone)]
10#[allow(non_snake_case)]
11pub struct AssemblyStats {
12 pub n_contigs: usize,
14 pub total_length: usize,
16 pub largest_contig: usize,
18 pub smallest_contig: usize,
20 pub gc_content: f64,
22 pub n50: usize,
24 pub l50: usize,
26 pub n90: usize,
28 pub l90: usize,
30 #[allow(non_snake_case)]
32 pub auN: f64,
33}
34
35pub fn assembly_stats(contigs: &[&[u8]]) -> Result<AssemblyStats> {
41 if contigs.is_empty() {
42 return Err(CyaneaError::InvalidInput(
43 "at least one contig is required".into(),
44 ));
45 }
46
47 let mut lengths: Vec<usize> = contigs.iter().map(|c| c.len()).collect();
48 lengths.sort_unstable_by(|a, b| b.cmp(a)); let total_length: usize = lengths.iter().sum();
51 let largest_contig = lengths[0];
52 let smallest_contig = *lengths.last().unwrap();
53 let n_contigs = lengths.len();
54
55 let mut gc = 0usize;
57 let mut non_n = 0usize;
58 for contig in contigs {
59 for &b in *contig {
60 let upper = b.to_ascii_uppercase();
61 match upper {
62 b'G' | b'C' => {
63 gc += 1;
64 non_n += 1;
65 }
66 b'A' | b'T' => {
67 non_n += 1;
68 }
69 _ => {} }
71 }
72 }
73 let gc_content = if non_n > 0 {
74 gc as f64 / non_n as f64
75 } else {
76 0.0
77 };
78
79 let (n50, l50) = nx_from_sorted(&lengths, total_length, 0.5);
80 let (n90, l90) = nx_from_sorted(&lengths, total_length, 0.9);
81
82 #[allow(non_snake_case)]
84 let auN: f64 = lengths.iter().map(|&l| (l as f64).powi(2)).sum::<f64>() / total_length as f64;
85
86 Ok(AssemblyStats {
87 n_contigs,
88 total_length,
89 largest_contig,
90 smallest_contig,
91 gc_content,
92 n50,
93 l50,
94 n90,
95 l90,
96 auN,
97 })
98}
99
100pub fn nx_values(contigs: &[&[u8]], x: f64) -> Result<(usize, usize)> {
109 if contigs.is_empty() {
110 return Err(CyaneaError::InvalidInput(
111 "at least one contig is required".into(),
112 ));
113 }
114 if x <= 0.0 || x > 1.0 {
115 return Err(CyaneaError::InvalidInput(format!(
116 "x must be in (0, 1], got {}",
117 x
118 )));
119 }
120
121 let mut lengths: Vec<usize> = contigs.iter().map(|c| c.len()).collect();
122 lengths.sort_unstable_by(|a, b| b.cmp(a));
123 let total: usize = lengths.iter().sum();
124
125 Ok(nx_from_sorted(&lengths, total, x))
126}
127
128fn nx_from_sorted(lengths: &[usize], total: usize, x: f64) -> (usize, usize) {
130 let threshold = (total as f64 * x).ceil() as usize;
131 let mut cumulative = 0usize;
132 for (i, &len) in lengths.iter().enumerate() {
133 cumulative += len;
134 if cumulative >= threshold {
135 return (len, i + 1);
136 }
137 }
138 (*lengths.last().unwrap(), lengths.len())
140}
141
142#[cfg(test)]
143mod tests {
144 use super::*;
145
146 #[test]
147 fn assembly_stats_simple() {
148 let contigs: Vec<&[u8]> = vec![b"ACGTACGT", b"ACGT", b"AC"];
149 let stats = assembly_stats(&contigs).unwrap();
150 assert_eq!(stats.n_contigs, 3);
151 assert_eq!(stats.total_length, 14);
152 assert_eq!(stats.largest_contig, 8);
153 assert_eq!(stats.smallest_contig, 2);
154 }
155
156 #[test]
157 fn n50_known_values() {
158 let c1 = vec![b'A'; 10];
162 let c2 = vec![b'A'; 8];
163 let c3 = vec![b'A'; 6];
164 let c4 = vec![b'A'; 4];
165 let c5 = vec![b'A'; 2];
166 let contigs: Vec<&[u8]> = vec![&c1, &c2, &c3, &c4, &c5];
167 let (n50, l50) = nx_values(&contigs, 0.5).unwrap();
168 assert_eq!(n50, 8);
169 assert_eq!(l50, 2);
170 }
171
172 #[test]
173 fn gc_content_correct() {
174 let contigs: Vec<&[u8]> = vec![b"GGCC", b"AATT"];
175 let stats = assembly_stats(&contigs).unwrap();
176 assert!((stats.gc_content - 0.5).abs() < 1e-10);
177 }
178
179 #[test]
180 fn empty_contigs_error() {
181 let contigs: Vec<&[u8]> = vec![];
182 assert!(assembly_stats(&contigs).is_err());
183 }
184}