Skip to main content

report_builder/
plots.rs

1use plotly::box_plot::BoxMean;
2use plotly::common::{DashType, Line, Marker, Mode, Orientation};
3use plotly::{Plot, Histogram, Scatter, BoxPlot};
4use plotly::layout::{Axis, Layout, Legend};
5use itertools_num::linspace;
6
7/// Plot a histogram of the scores for the targets and decoys
8pub fn plot_score_histogram(scores: &Vec<f64>, labels: &Vec<i32>, title: &str, x_title: &str) -> Result<Plot, String> {
9    assert_eq!(scores.len(), labels.len(), "Scores and labels must have the same length");
10    assert!(labels.iter().all(|&l| l == 1 || l == -1), "Labels must be 1 for targets and -1 for decoys");
11
12    let mut scores_target = Vec::new();
13    let mut scores_decoy = Vec::new();
14
15    for (score, label) in scores.iter().zip(labels.iter()) {
16        if *label == 1 {
17            scores_target.push(*score);
18        } else {
19            scores_decoy.push(*score);
20        }
21    }
22
23    let trace_target = Histogram::new(scores_target).name("Target");
24    let trace_decoy = Histogram::new(scores_decoy).name("Decoy");
25
26    let layout = Layout::new()
27        .title(title)
28        .x_axis(plotly::layout::Axis::new().title(x_title))
29        .y_axis(plotly::layout::Axis::new().title("Density"));
30
31    let mut plot = Plot::new();
32    plot.add_trace(trace_target);
33    plot.add_trace(trace_decoy);
34    plot.set_layout(layout);
35
36    Ok(plot)
37}
38
39
40
41fn ecdf(data: &mut Vec<f64>) -> (Vec<f64>, Vec<f64>) {
42    data.sort_by(|a, b| a.partial_cmp(b).unwrap());
43    let n = data.len() as f64;
44    let y: Vec<f64> = (1..=data.len()).map(|i| i as f64 / n).collect();
45    (data.clone(), y)
46}
47
48fn interpolate_ecdf(x: &Vec<f64>, y: &Vec<f64>, x_seq: &Vec<f64>) -> Vec<f64> {
49    x_seq.iter().map(|&xi| {
50        let idx = x.iter().position(|&xv| xv >= xi).unwrap_or(x.len() - 1);
51        y[idx]
52    }).collect()
53}
54
55// fn estimate_pi0(decoy_scores: &Vec<f64>, lambda: f64) -> f64 {
56//     let n = decoy_scores.len() as f64;
57//     let count_above_lambda = decoy_scores.iter().filter(|&&s| s > lambda).count() as f64;
58//     count_above_lambda / ((1.0 - lambda) * n)
59// }
60
61/// Estimate the proportion of null hypotheses (π₀).
62fn estimate_pi0(labels: &Vec<i32>) -> f64 {
63    let count_decoys = labels.iter().filter(|&&l| l == -1).count() as f64;
64    let count_targets = labels.iter().filter(|&&l| l == 1).count() as f64;
65    count_decoys / count_targets
66}
67
68/// Generate a P-P plot as described in Debrie, E. et. al. (2023) Journal of Proteome Research.
69/// 
70/// # Arguments
71/// 
72/// * `top_targets` - The top scores for the targets
73/// * `top_decoys` - The top scores for the decoys
74/// * `title` - The title of the plot
75/// 
76/// # Returns
77/// 
78/// A Plot object containing the P-P plot
79pub fn plot_pp(scores: &Vec<f64>, labels: &Vec<i32>, title: &str) -> Result<Plot, String> {
80    assert_eq!(scores.len(), labels.len(), "Scores and labels must have the same length");
81    assert!(labels.iter().all(|&l| l == 1 || l == -1), "Labels must be 1 for targets and -1 for decoys");
82
83    let mut scores_target = Vec::new();
84    let mut scores_decoy = Vec::new();
85
86    for (score, label) in scores.iter().zip(labels.iter()) {
87        if *label == 1 {
88            scores_target.push(*score);
89        } else {
90            scores_decoy.push(*score);
91        }
92    }
93
94    let (x_target, y_target) = ecdf(&mut scores_target);
95    let (x_decoy, y_decoy) = ecdf(&mut scores_decoy);
96
97    let x_min = x_target.first().unwrap().min(*x_decoy.first().unwrap());
98    let x_max = x_target.last().unwrap().max(*x_decoy.last().unwrap());
99    let x_seq: Vec<f64> = linspace(x_min, x_max, 1000).collect();
100
101    let y_target_interp = interpolate_ecdf(&x_target, &y_target, &x_seq);
102    let y_decoy_interp = interpolate_ecdf(&x_decoy, &y_decoy, &x_seq);
103
104    // let pi0 = estimate_pi0(&scores_decoy, 0.5);
105    let pi0 = estimate_pi0(labels);
106    let pi0_line_y: Vec<f64> = y_decoy_interp.iter().map(|&x| pi0 * x).collect();
107
108    let mut plot = Plot::new();
109
110    let scatter = Scatter::new(y_decoy_interp.clone(), y_target_interp.clone())
111        .mode(Mode::Markers)
112        .name("Target vs Decoy ECDF");
113
114    let reference_line = Scatter::new(vec![0.0, 1.0], vec![0.0, 1.0])
115        .mode(Mode::Lines)
116        .name("y = x (Perfect match)")
117        .line(Line::new().color("red").dash(DashType::Dash));
118
119    let pi0_line = Scatter::new(y_decoy_interp.clone(), pi0_line_y)
120        .mode(Mode::Lines)
121        .name(format!("Estimated π₀ = {:.3}", pi0))
122        .line(Line::new().color("blue").dash(DashType::Dot));
123
124    plot.add_trace(scatter);
125    plot.add_trace(reference_line);
126    plot.add_trace(pi0_line);
127    plot.set_layout(
128        Layout::new()
129            .title(title)
130            .x_axis(plotly::layout::Axis::new().title("Decoy ECDF"))
131            .y_axis(plotly::layout::Axis::new().title("Target ECDF")),
132    );
133
134    Ok(plot)
135}
136
137/// Generate a box plot of the scores/intensities for each file
138/// 
139/// # Arguments
140/// 
141/// * `scores` - A vector of vectors where each inner vector contains the scores/intensities for a file
142/// * `filenames` - A vector of filenames corresponding to the scores
143/// * `title` - The title of the plot
144/// * `x_title` - The title of the x-axis
145/// * `y_title` - The title of the y-axis
146/// 
147/// # Returns
148/// 
149/// A Plot object containing the box plot
150pub fn plot_boxplot(scores: &Vec<Vec<f64>>, filenames: Vec<String>, title: &str, x_title: &str, y_title: &str) -> Result<Plot, String> {
151    assert_eq!(scores.len(), filenames.len(), "Scores and filenames must have the same length");
152
153    let mut plot = Plot::new();
154    for (i, s) in scores.iter().enumerate() {
155        let trace = BoxPlot::new_xy(
156            vec![filenames[i].clone(); s.len()],
157            s.to_vec()).name(filenames[i].clone()).box_mean(BoxMean::True);
158        plot.add_trace(trace);
159    }
160    
161    let layout = Layout::new()
162        .title(title)
163        .x_axis(Axis::new().title(x_title).tick_angle(45.0))
164        .y_axis(Axis::new().title(y_title))
165        .show_legend(false);
166    
167    plot.set_layout(layout);
168
169    Ok(plot)
170}
171
172
173pub fn plot_scatter(x: &Vec<Vec<f64>>, y: &Vec<Vec<f64>>, labels: Vec<String>, title: &str, x_title: &str, y_title: &str) -> Result<Plot, String> {
174    assert_eq!(x.len(), y.len(), "X and Y must have the same length");
175
176    // Check to see how large the data is, if there's a large amount of data we should use web_gl_mode. We can look at one of the arrays to see how many points there are
177    let web_gl_mode = x[0].len() > 10_000;
178
179    let mut plot = Plot::new();
180    for (i, (x_i, y_i)) in x.iter().zip(y.iter()).enumerate() {
181        let trace = Scatter::new(x_i.to_vec(), y_i.to_vec()).name(labels[i].clone()).mode(Mode::Markers).marker(Marker::new().size(10)).web_gl_mode(true);
182        plot.add_trace(trace);
183    }
184
185    let layout = Layout::new()
186        .title(title)
187        .x_axis(Axis::new().title(x_title))
188        .y_axis(Axis::new().title(y_title))
189        .legend(Legend::new().orientation(Orientation::Vertical));
190
191    plot.set_layout(layout);
192
193    Ok(plot)
194}
195
196#[cfg(test)]
197mod tests {
198    use super::*;
199
200    #[test]
201    fn test_plot_boxplot() {
202        let scores = vec![
203            vec![1.0, 2.0, 3.0, 4.0, 5.0],
204            vec![6.0, 7.0, 8.0, 9.0, 10.0],
205            vec![11.0, 12.0, 13.0, 14.0, 15.0],
206        ];
207        let filenames = vec![
208            "file1".to_string(),
209            "file2".to_string(),
210            "file3".to_string(),
211        ];
212        let title = "Box Plot";
213        let x_title = "Filenames";
214        let y_title = "Scores";
215
216        let plot = plot_boxplot(&scores, filenames, title, x_title, y_title).unwrap();
217
218        plot.write_html("test_plot_boxplot.html");
219
220        // assert_eq!(plot.
221        // assert_eq!(plot.layout.title, Some(title.to_string()));
222        // assert_eq!(plot.layout.x_axis, Some(Axis::new().title(x_title).tick_angle(45.0)));
223        // assert_eq!(plot.layout.y_axis, Some(Axis::new().title(y_title)));
224    }
225
226    #[test]
227    #[should_panic(expected = "Scores and filenames must have the same length")]
228    fn test_plot_boxplot_mismatched_lengths() {
229        let scores = vec![
230            vec![1.0, 2.0, 3.0, 4.0, 5.0],
231            vec![6.0, 7.0, 8.0, 9.0, 10.0],
232        ];
233        let filenames = vec![
234            "file1".to_string(),
235            "file2".to_string(),
236            "file3".to_string(),
237        ];
238        let title = "Box Plot";
239        let x_title = "Filenames";
240        let y_title = "Scores";
241
242        plot_boxplot(&scores, filenames, title, x_title, y_title).unwrap();
243    }
244
245    #[test]
246    fn test_plot_scatter() {
247        let x = vec![
248            vec![1.0, 2.0, 3.0, 4.0, 5.0],
249            vec![2.0, 7.0, 3.0, 9.0, 10.0],
250            vec![1.0, 12.0, 13.0, 14.0, 15.0],
251        ];
252        let y = vec![
253            vec![1.0, 2.0, 3.0, 4.0, 5.0],
254            vec![6.0, 7.0, 8.0, 9.0, 10.0],
255            vec![11.0, 12.0, 13.0, 14.0, 15.0],
256        ];
257        let labels = vec![
258            "file1".to_string(),
259            "file2".to_string(),
260            "file3".to_string(),
261        ];
262        let title = "Scatter Plot";
263        let x_title = "X";
264        let y_title = "Y";
265
266        let plot = plot_scatter(&x, &y, labels, title, x_title, y_title).unwrap();
267
268        plot.write_html("test_plot_scatter.html");
269    }
270
271}