struct_compression_analyzer/plot/
mod.rs

1//! Generates plots for analysis results.
2//!
3//! This module provides functions to create various plots based on the analysis
4//! results, using the `plotters` crate.
5
6use crate::comparison::{
7    compare_groups::GroupComparisonResult, split_comparison::SplitComparisonResult,
8};
9use crate::results::analysis_results::AnalysisResults;
10use core::{error::Error, ops::Range};
11use plotters::{prelude::*, style::full_palette::PURPLE};
12use std::{fs, path::Path};
13
14/// Generates all plots for the analysis results.
15///
16/// This function acts as a wrapper to generate multiple plots,
17/// including the split comparison plot.
18///
19/// # Arguments
20///
21/// * `results` - A slice of [`AnalysisResults`], one for each analyzed file.
22/// * `output_dir` - The directory where the plot files will be written.
23///
24/// # Returns
25///
26/// * `Result<(), Box<dyn std::error::Error>>` - Ok if successful, otherwise a boxed [`std::error::Error`].
27pub fn generate_plots(
28    results: &[AnalysisResults],
29    output_dir: &Path,
30) -> Result<(), Box<dyn std::error::Error>> {
31    if results.is_empty() {
32        return Ok(());
33    }
34
35    let split_compare_dir = output_dir.join("split_comparison_plots");
36    fs::create_dir_all(&split_compare_dir)?;
37
38    // Generate split comparison plot
39    for (x, comparison) in results[0].split_comparisons.iter().enumerate() {
40        let output_path = split_compare_dir.join(format!("{}.png", comparison.name));
41        generate_ratio_split_comparison_plot(results, x, &output_path, false, false)?;
42
43        let output_path = split_compare_dir.join(format!("{}_with_estimate.png", comparison.name));
44        generate_ratio_split_comparison_plot(results, x, &output_path, false, true)?;
45
46        let output_path =
47            split_compare_dir.join(format!("{}_with_entropy_by_lzmatches.png", comparison.name));
48        generate_ratio_split_comparison_plot(results, x, &output_path, true, false)?;
49    }
50
51    let custom_comparisons_dir = output_dir.join("custom_comparison_plots");
52    fs::create_dir_all(&custom_comparisons_dir)?;
53
54    // Generate custom comparison plot
55    // Note: Assumption all items have same number of comparisons and
56    for (x, comparison) in results[0].custom_comparisons.iter().enumerate() {
57        // Write data for individual groups.
58        for (y, group_name) in comparison.group_names.iter().enumerate() {
59            let output_path = custom_comparisons_dir.join(format!(
60                "{}_{}_{}.png",
61                comparison.name,
62                group_name.replace(' ', "_"),
63                y
64            ));
65            generate_ratio_custom_comparison_plot(results, x, y..y + 1, &output_path, false)?;
66
67            let output_path = custom_comparisons_dir.join(format!(
68                "{}_{}_{}_with_estimate.png",
69                comparison.name,
70                group_name.replace(' ', "_"),
71                y
72            ));
73            generate_ratio_custom_comparison_plot(results, x, y..y + 1, &output_path, true)?;
74        }
75
76        let output_path = custom_comparisons_dir.join(format!("{}.png", comparison.name));
77        generate_ratio_custom_comparison_plot(
78            results,
79            x,
80            0..comparison.group_names.len(),
81            &output_path,
82            false,
83        )?;
84
85        let output_path =
86            custom_comparisons_dir.join(format!("{}_with_estimate.png", comparison.name));
87        generate_ratio_custom_comparison_plot(
88            results,
89            x,
90            0..comparison.group_names.len(),
91            &output_path,
92            true,
93        )?;
94    }
95
96    // Add calls to other plot generation functions here in the future
97    Ok(())
98}
99
100/// Struct to hold data and styling for a single plot line.
101struct PlotData {
102    label: String,
103    line_color: RGBColor,
104    data_points: Vec<(f64, f64)>,
105}
106
107/// Generates a line plot for the various columns from a split comparison.
108///
109/// # Arguments
110///
111/// * `results` - A slice of [`AnalysisResults`], one for each analyzed file.
112/// * `comparison_index` - The index of the split comparison to plot in the `split_comparisons` array.
113/// * `output_path` - The path where the plot file will be written.
114/// * `include_entropy_by_lzmatches_column` - Includes column for (1 / lz_matches * entropy_ratio).
115/// * `include_estimate_column` - Includes column for (estimate_ratio).
116///
117/// # Returns
118///
119/// * `Result<(), Box<dyn std::error::Error>>` - Ok if successful, otherwise a boxed [`std::error::Error`].
120pub fn generate_ratio_split_comparison_plot(
121    results: &[AnalysisResults],
122    comparison_index: usize,
123    output_path: &Path,
124    include_entropy_by_lzmatches_column: bool,
125    include_estimate_column: bool,
126) -> Result<(), Box<dyn std::error::Error>> {
127    if results.is_empty() || results[0].split_comparisons.is_empty() {
128        return Ok(()); // No data to plot
129    }
130
131    let root = create_drawing_area(results, output_path)?;
132
133    // Create the chart.
134    let mut chart = create_ratio_chart(results.len(), &root)?;
135
136    // Add labels (file indices).
137    draw_ratio_grid(results.len(), &mut chart)?;
138
139    // Prepare plot data
140    let mut plots: Vec<PlotData> = Vec::new();
141
142    // Zstd Ratio Plot Data
143    let zstd_data_points = make_split_data_points(results, comparison_index, |comparison| {
144        let base_zstd = comparison.group1_metrics.zstd_size;
145        let compare_zstd = comparison.group2_metrics.zstd_size;
146        calc_ratio_f64(compare_zstd, base_zstd)
147    });
148
149    plots.push(PlotData {
150        label: "zstd_ratio".to_owned(),
151        line_color: BLACK,
152        data_points: zstd_data_points,
153    });
154
155    // LZ Ratio Plot Data (inverted)
156    let lz_data_points = make_split_data_points(results, comparison_index, |comparison| {
157        let base_lz = comparison.group1_metrics.lz_matches;
158        let compare_lz = comparison.group2_metrics.lz_matches;
159        1.0 / calc_ratio_f64(compare_lz, base_lz)
160    });
161
162    plots.push(PlotData {
163        label: "1 / lz_matches_ratio".to_owned(),
164        line_color: RED,
165        data_points: lz_data_points,
166    });
167
168    // Entropy Difference Plot Data
169    let lz_data_points = make_split_data_points(results, comparison_index, |comparison| {
170        1.0 / comparison.split_max_entropy_diff_ratio()
171    });
172
173    plots.push(PlotData {
174        label: "1 / entropy_ratio".to_owned(),
175        line_color: GREEN,
176        data_points: lz_data_points,
177    });
178
179    if include_entropy_by_lzmatches_column {
180        let data_points = make_split_data_points(results, comparison_index, |comparison| {
181            let base_lz = comparison.group1_metrics.lz_matches;
182            let compare_lz = comparison.group2_metrics.lz_matches;
183            let lz_matches_ratio = calc_ratio_f64(compare_lz, base_lz);
184            1.0 / (comparison.split_max_entropy_diff_ratio() * lz_matches_ratio)
185        });
186
187        plots.push(PlotData {
188            label: "1 / (entropy_ratio * lz_matches)".to_owned(),
189            line_color: BLUE,
190            data_points,
191        });
192    }
193
194    if include_estimate_column {
195        // LZ Ratio Plot Data (inverted)
196        let data_points = make_split_data_points(results, comparison_index, |comparison| {
197            let base_est = comparison.group1_metrics.estimated_size;
198            let compare_est = comparison.group2_metrics.estimated_size;
199            calc_ratio_f64(compare_est, base_est)
200        });
201
202        plots.push(PlotData {
203            label: "estimate_ratio".to_owned(),
204            line_color: PURPLE,
205            data_points,
206        });
207    }
208
209    // Draw plots
210    for plot in plots {
211        draw_plot(&mut chart, &plot)?;
212    }
213
214    add_series_labels(&mut chart)?;
215    root.present()?;
216    Ok(())
217}
218
219/// Generates the base colours that will be transformed by a gradient
220fn generate_base_colors(
221    num_colors: usize,
222) -> Result<Vec<(RGBColor, RGBColor)>, Box<dyn std::error::Error>> {
223    let mut colours = Vec::<(RGBColor, RGBColor)>::new();
224    if num_colors > 0 {
225        colours.push((RGBColor(0, 0, 0), RGBColor(150, 150, 150))); // Black to light grey
226    }
227    if num_colors > 1 {
228        colours.push((RGBColor(255, 0, 0), RGBColor(255, 150, 150))); // Red to light red
229    }
230    if num_colors > 2 {
231        colours.push((RGBColor(0, 255, 0), RGBColor(150, 255, 150))); // Green to light green
232    }
233    if num_colors > 3 {
234        colours.push((RGBColor(0, 0, 255), RGBColor(150, 150, 255))); // Blue to light blue
235    }
236    if num_colors > 4 {
237        return Err(Box::<dyn Error>::from(format!(
238            "Too many colours: {}",
239            num_colors
240        )));
241    }
242    Ok(colours)
243}
244
245/// Generates a sequence of distinct colors for plotting, with gradients.
246/// The colours are interleaved, R,G,B * num_gradients
247fn generate_color_palette(
248    base_colors: &[(RGBColor, RGBColor)],
249    num_gradients: usize,
250) -> Vec<RGBColor> {
251    let mut palette = Vec::new();
252
253    // (color channels)
254    for x in 0..num_gradients {
255        // Alternate, R,G,B
256        for (base_color, end_color) in base_colors {
257            let gradient_step = if num_gradients == 1 {
258                0.0
259            } else {
260                x as f32 / (num_gradients - 1) as f32
261            };
262
263            let r_step = (end_color.0 as f32 - base_color.0 as f32) * gradient_step;
264            let g_step = (end_color.1 as f32 - base_color.1 as f32) * gradient_step;
265            let b_step = (end_color.2 as f32 - base_color.2 as f32) * gradient_step;
266            let r = (base_color.0 as f32 + r_step) as u8;
267            let g = (base_color.1 as f32 + g_step) as u8;
268            let b = (base_color.2 as f32 + b_step) as u8;
269
270            palette.push(RGBColor(r, g, b));
271        }
272    }
273
274    palette
275}
276
277/// Generates a line plot for the various columns from a custom comparison.
278///
279/// # Arguments
280///
281/// * `results` - A slice of [`AnalysisResults`], one for each analyzed file.
282/// * `comparison_index` - The index of the custom comparison to plot in the `custom_comparisons` array.
283/// * `group_indices` - The range of indices for the groups to compare.
284/// * `output_path` - The path where the plot file will be written.
285/// * `include_estimate_column` - Whether to include the estimate ratio column.
286///
287/// # Returns
288///
289/// * `Result<(), Box<dyn std::error::Error>>` - Ok if successful, otherwise a boxed [`std::error::Error`].
290pub fn generate_ratio_custom_comparison_plot(
291    results: &[AnalysisResults],
292    comparison_index: usize,
293    group_indices: Range<usize>,
294    output_path: &Path,
295    include_estimate_column: bool,
296) -> Result<(), Box<dyn std::error::Error>> {
297    if results.is_empty() || results[0].split_comparisons.is_empty() {
298        return Ok(()); // No data to plot
299    }
300
301    let root = create_drawing_area(results, output_path)?;
302
303    // Create the chart.
304    let mut chart = create_ratio_chart(results.len(), &root)?;
305
306    // Add labels (file indices).
307    draw_ratio_grid(results.len(), &mut chart)?;
308
309    // Prepare plot data
310    let mut plots: Vec<PlotData> = Vec::new();
311    let group_names = &results[0].custom_comparisons[0].group_names;
312
313    // Get color palette
314    let num_gradients = group_indices.len();
315    let num_base_colors = 4;
316    let base_colors = generate_base_colors(num_base_colors)?;
317    let colors = generate_color_palette(&base_colors, num_gradients);
318
319    // Zstd Ratio Plot Data
320    let start_index = group_indices.start;
321    for group_idx in group_indices {
322        let group_name = &group_names[group_idx];
323        let group_offset = group_idx - start_index;
324        let color_offset = group_offset * num_base_colors;
325
326        let zstd_data_points = make_custom_data_points(results, comparison_index, |comparison| {
327            let base_zstd = comparison.baseline_metrics.zstd_size;
328            let compare_zstd = comparison.group_metrics[group_idx].zstd_size;
329            calc_ratio_f64(compare_zstd, base_zstd)
330        });
331
332        plots.push(PlotData {
333            label: format!("zstd_ratio ({})", group_name),
334            line_color: colors[color_offset],
335            data_points: zstd_data_points,
336        });
337
338        // LZ Ratio Plot Data (inverted)
339        let lz_data_points = make_custom_data_points(results, comparison_index, |comparison| {
340            let base_lz = comparison.baseline_metrics.lz_matches;
341            let compare_lz = comparison.group_metrics[group_idx].lz_matches;
342            1.0 / calc_ratio_f64(compare_lz, base_lz)
343        });
344
345        plots.push(PlotData {
346            label: format!("1 / lz_matches_ratio ({})", group_name),
347            line_color: colors[color_offset + 1],
348            data_points: lz_data_points,
349        });
350
351        // Entropy Ratio Plot Data
352        let entropy_data_points =
353            make_custom_data_points(results, comparison_index, |comparison| {
354                1.0 / (comparison.baseline_metrics.entropy
355                    / comparison.group_metrics[group_idx].entropy)
356            });
357
358        // Don't plot if the entropy ratio is 1.0.
359        // This is a 'rough' check to avoid plotting a straight line.
360        if entropy_data_points[0].1 != 1.0 {
361            plots.push(PlotData {
362                label: format!("entropy_ratio ({})", group_name),
363                line_color: colors[color_offset + 2],
364                data_points: entropy_data_points,
365            });
366        }
367
368        // Estimate Ratio Plot Data
369        if include_estimate_column {
370            let estimate_data_points =
371                make_custom_data_points(results, comparison_index, |comparison| {
372                    let base_zstd = comparison.baseline_metrics.estimated_size;
373                    let compare_zstd = comparison.group_metrics[group_idx].estimated_size;
374                    calc_ratio_f64(compare_zstd, base_zstd)
375                });
376
377            plots.push(PlotData {
378                label: format!("estimate_ratio ({})", group_name),
379                line_color: colors[color_offset + 3],
380                data_points: estimate_data_points,
381            });
382        }
383    }
384
385    // Draw plots
386    for plot in plots {
387        draw_plot(&mut chart, &plot)?;
388    }
389
390    add_series_labels(&mut chart)?;
391    root.present()?;
392    Ok(())
393}
394
395/// Calculates the data points for a plot.
396fn make_split_data_points<F>(
397    results: &[AnalysisResults],
398    comp_idx: usize,
399    value_calculator: F,
400) -> Vec<(f64, f64)>
401where
402    F: Fn(&SplitComparisonResult) -> f64,
403{
404    let mut data_points: Vec<(f64, f64)> = Vec::new();
405    for (file_idx, result) in results.iter().enumerate() {
406        let comparison_result = &result.split_comparisons[comp_idx];
407        let y_value = value_calculator(comparison_result);
408        data_points.push((file_idx as f64, y_value));
409    }
410    data_points
411}
412
413/// Calculates the data points for a plot.
414fn make_custom_data_points<F>(
415    results: &[AnalysisResults],
416    comp_idx: usize,
417    value_calculator: F,
418) -> Vec<(f64, f64)>
419where
420    F: Fn(&GroupComparisonResult) -> f64,
421{
422    let mut data_points: Vec<(f64, f64)> = Vec::new();
423    for (file_idx, result) in results.iter().enumerate() {
424        let comparison_result = &result.custom_comparisons[comp_idx];
425        let y_value = value_calculator(comparison_result);
426        data_points.push((file_idx as f64, y_value));
427    }
428    data_points
429}
430
431/// Draws a single plot line and its points.
432fn draw_plot<'a>(
433    chart: &mut ChartContext<
434        'a,
435        BitMapBackend<'a>,
436        Cartesian2d<plotters::coord::types::RangedCoordf64, plotters::coord::types::RangedCoordf64>,
437    >,
438    plot: &PlotData,
439) -> Result<(), Box<dyn std::error::Error>> {
440    let line_color = plot.line_color;
441    let line_style = ShapeStyle::from(line_color).stroke_width(5);
442    let coord_style = ShapeStyle::from(BLACK).filled();
443
444    let plot_points = plot.data_points.clone();
445    chart
446        .draw_series(LineSeries::new(plot_points, line_style))?
447        .label(&plot.label)
448        .legend(move |(x, y)| {
449            PathElement::new(
450                vec![(x, y), (x + 20, y)],
451                ShapeStyle::from(line_color).stroke_width(5),
452            )
453        });
454
455    chart.draw_series(PointSeries::<_, _, Circle<_, _>, _>::new(
456        plot.data_points.clone(),
457        7.5,
458        coord_style,
459    ))?;
460
461    Ok(())
462}
463
464fn create_drawing_area<'a>(
465    results: &[AnalysisResults],
466    output_file: &'a Path,
467) -> Result<DrawingArea<BitMapBackend<'a>, plotters::coord::Shift>, Box<dyn std::error::Error>> {
468    // Auto adjust size such that each value has constant amount of sapce.
469    let width = results.len() * 64;
470    let root = BitMapBackend::new(output_file, (width as u32, 1440)).into_drawing_area();
471    root.fill(&WHITE)?;
472    Ok(root)
473}
474
475/// Creates a chart for plotting compression ratio information,
476/// with a fixed range of 0.6 to 1.20 in terms of compression ratio.
477fn create_ratio_chart<'a>(
478    num_results: usize,
479    root: &DrawingArea<BitMapBackend<'a>, plotters::coord::Shift>,
480) -> Result<
481    ChartContext<
482        'a,
483        BitMapBackend<'a>,
484        Cartesian2d<plotters::coord::types::RangedCoordf64, plotters::coord::types::RangedCoordf64>,
485    >,
486    Box<dyn std::error::Error>,
487> {
488    let chart: ChartContext<
489        '_,
490        BitMapBackend<'a>,
491        Cartesian2d<plotters::coord::types::RangedCoordf64, plotters::coord::types::RangedCoordf64>,
492    > = ChartBuilder::on(root)
493        .margin(5)
494        .x_label_area_size(80)
495        .y_label_area_size(80)
496        .build_cartesian_2d(
497            0f64..num_results as f64, // x axis range, one point per file
498            0.60f64..1.20f64,         // y axis range, adjust as needed
499        )?;
500    Ok(chart)
501}
502
503/// Draws the grid, including the labels for a graph which presents a compression ratio
504/// centered around 1.0
505fn draw_ratio_grid<'a>(
506    results_len: usize,
507    chart: &mut ChartContext<
508        'a,
509        BitMapBackend<'a>,
510        Cartesian2d<plotters::coord::types::RangedCoordf64, plotters::coord::types::RangedCoordf64>,
511    >,
512) -> Result<(), Box<dyn std::error::Error>> {
513    chart
514        .configure_mesh()
515        // Title
516        .axis_desc_style(("sans-serif", 40).into_font())
517        // y labels
518        .y_label_style(("sans-serif", 40).into_font())
519        // x labels
520        .x_labels(results_len)
521        .x_label_style(("sans-serif", 40).into_font())
522        .x_label_formatter(&|x| format!("{}", x))
523        .draw()?;
524    Ok(())
525}
526
527/// Adds the series labels to the current chart.
528/// i.e. the little box which shows lines and their corresponding names.
529fn add_series_labels<'a>(
530    chart: &mut ChartContext<
531        'a,
532        BitMapBackend<'a>,
533        Cartesian2d<plotters::coord::types::RangedCoordf64, plotters::coord::types::RangedCoordf64>,
534    >,
535) -> Result<(), Box<dyn std::error::Error>> {
536    chart
537        .configure_series_labels()
538        .label_font(("sans-serif", 40))
539        .background_style(WHITE.mix(0.8))
540        .border_style(BLACK)
541        .position(SeriesLabelPosition::UpperLeft)
542        .draw()?;
543    Ok(())
544}
545
546/// Calculates a ratio between two numbers, handling division by zero.
547///
548/// # Arguments
549///
550/// * `child` - The numerator. (comparison)
551/// * `parent` - The denominator. (base)
552///
553/// # Returns
554///
555/// A string representing the ratio, or "0.0" if the denominator is zero.
556pub(crate) fn calc_ratio_f64(child: u64, parent: u64) -> f64 {
557    if parent == 0 {
558        0.0
559    } else {
560        child as f64 / parent as f64
561    }
562}