crate-activity 0.7.0


crate::ix!();

pub async fn crate_activity_main(cli: &CrateActivityCli) -> Result<(), CrateActivityError> {

    tracing_setup::configure_tracing();

    let config_dir   = configure_directory().await?;
    let crate_names  = read_crate_list(&config_dir).await;
    let user_agent   = read_user_agent(&config_dir).await;
    let ignore_cache = *cli.ignore_cache();

    let today = Utc::now().date_naive();
    let one_day_ago    = today - chrono::Duration::days(1);
    let three_days_ago = today - chrono::Duration::days(3);
    let seven_days_ago = today - chrono::Duration::days(7);

    let activity_data = gather_crate_activity_data(
        ignore_cache,
        &crate_names,
        &user_agent,
        &config_dir,
        one_day_ago,
        three_days_ago,
        seven_days_ago,
    )
    .await?;

    let min_group_size = *cli.min_group_size();
    let expand_groups = *cli.expand_groups();

    let activity_summary = CrateActivitySummary::new(
        activity_data.summaries(),
        activity_data.interval_downloads_1d().clone(),
        activity_data.interval_downloads_3d().clone(),
        activity_data.interval_downloads_7d().clone(),
        one_day_ago,
        three_days_ago,
        seven_days_ago,
        expand_groups,
        min_group_size 
    );

    tracing::info!("{}", activity_summary);

    let cleaned_summaries = if cli.disable_outlier_handling() {
        tracing::info!("Outlier detection disabled. Using raw data.");
        activity_data.summaries().to_vec()
    } else {
        let z_threshold = *cli.outlier_z_threshold();
        let downweight = *cli.downweight_outliers();
        let weight = *cli.outlier_weight();

        activity_data.summaries().iter().map(|s| {
            let downloads: Vec<i64> = s.version_downloads().iter().map(|d| *d.downloads()).collect();

            let outliers = detect_outliers_zscore(&downloads, z_threshold);
            let outlier_count = outliers.iter().filter(|&&o| o).count();

            if outlier_count > 0 {
                if downweight {
                    tracing::info!(
                        "Crate '{}' had {} outliers (z-threshold={:.2}); downweighting by {:.2}",
                        s.crate_name(),
                        outlier_count,
                        z_threshold,
                        weight
                    );
                } else {
                    tracing::info!(
                        "Crate '{}' had {} outliers (z-threshold={:.2}); removing them.",
                        s.crate_name(),
                        outlier_count,
                        z_threshold
                    );
                }
            } else {
                tracing::info!(
                    "Crate '{}' had no outliers detected (z-threshold={:.2}).",
                    s.crate_name(),
                    z_threshold
                );
            }

            let cleaned_version_downloads: Vec<VersionDownload> = if downweight {
                let adjusted = downweight_outliers(&downloads, &outliers, weight);
                s.version_downloads()
                 .iter()
                 .zip(adjusted.iter())
                 .map(|(vd, &val)| {
                     let adjusted_val = val.round() as i64;
                     VersionDownloadBuilder::default()
                         .version(*vd.version())
                         .downloads(adjusted_val)
                         .date(*vd.date())
                         .build()
                         .unwrap()
                 })
                 .collect()
            } else {
                s.version_downloads()
                 .iter()
                 .zip(outliers.iter())
                 .filter_map(|(vd, &is_outlier)| if !is_outlier { Some(vd.clone()) } else { None })
                 .collect()
            };

            let total: i64 = cleaned_version_downloads.iter().map(|d| d.downloads()).sum();
            let count = cleaned_version_downloads.len().max(1) as f64;
            let avg = total as f64 / count;
            let peak = cleaned_version_downloads.iter().map(|d| d.downloads()).max().cloned().unwrap_or(0);

            CrateUsageSummaryBuilder::default()
                .crate_name(s.crate_name().to_string())
                .total_downloads(total)
                .average_daily_downloads(avg)
                .peak_daily_downloads(peak)
                .download_trend(s.download_trend().clone())
                .version_downloads(cleaned_version_downloads)
                .build()
                .expect("Failed to build cleaned CrateUsageSummary")
        }).collect()
    };

    // Compute correlations on (possibly cleaned) summaries
    let correlations = compute_pairwise_correlations(&cleaned_summaries);
    if *cli.show_correlations() {
        display_correlations(&correlations);
    }

    if *cli.perform_pca() {
        let crate_activity: HashMap<_, _> = cleaned_summaries.iter().map(|summary| {
            (
                summary.crate_name().clone(),
                summary.version_downloads().iter().map(|d| *d.downloads()).collect(),
            )
        }).collect();

        perform_pca(&crate_activity)?;
    }

    if *cli.perform_hierarchical_clustering() {
        let dendrogram = perform_hierarchical_clustering(&correlations)?;
        display_dendrogram(&dendrogram);
    }

    if *cli.correlation_network() {
        let threshold = *cli.network_threshold();
        let graph = build_correlation_graph(&correlations, threshold);

        if *cli.print_summary() {
            display_graph_summary(&graph);
        }

        if let Some(target) = cli.girvan_newman() {
            let communities = girvan_newman_communities(graph.clone(), *target);
            display_network_communities(&communities);
        } else {
            let communities = find_communities(&graph);
            display_network_communities(&communities);
        }

        if *cli.compute_betweenness() {
            let (node_bet, _edge_bet) = compute_betweenness_centrality(&graph);
            display_top_betweenness_nodes(&node_bet, 10);
        }
    }

    if *cli.time_lag_correlations() {
        let max_lag = *cli.max_lag();
        let lag_results = compute_time_lag_correlations(&cleaned_summaries, max_lag);
        display_time_lag_correlations(&lag_results);
    }

    tracing::info!("Crate usage analysis completed.");

    Ok(())
}
pub(crate) use reqwest::Client;
pub(crate) use futures::{future,StreamExt, TryStreamExt};
pub(crate) use serde::{Serialize,Deserialize};
pub(crate) use std::collections::{VecDeque,HashSet,HashMap};
pub(crate) use error_tree::*;
pub(crate) use tokio::{
    fs::{self,File},
    io::{self,AsyncBufReadExt,BufReader}
};
pub(crate) use std::path::{PathBuf,Path};
pub(crate) use chrono::{NaiveDate,DateTime, Utc};
pub(crate) use export_magic::*;
pub(crate) use getset::{Getters, Setters};
pub(crate) use derive_builder::Builder;
pub(crate) use std::fmt;
pub(crate) use structopt::StructOpt;
pub(crate) use itertools::Itertools;
pub(crate) use nalgebra::{DMatrix};
pub(crate) use ndarray::{Array2, Axis};
pub(crate) use ndarray_stats::SummaryStatisticsExt;
pub(crate) use std::f64;
crate::ix!();

pub fn has_significant_variance(data: &[i64]) -> bool {
    let mean = data.iter().sum::<i64>() as f64 / data.len() as f64;
    let variance = data.iter().map(|&x| (x as f64 - mean).powi(2)).sum::<f64>() / data.len() as f64;

    variance > 1e-5 // Use a small threshold to filter out near-constant datasets
}

#[cfg(test)]
mod check_for_significant_variance_tests {
    use super::*;

    #[test]
    fn test_empty_data() {
        let data = [];
        assert!(!has_significant_variance(&data), "Empty data should have no significant variance.");
    }

    #[test]
    fn test_single_element() {
        let data = [10];
        assert!(!has_significant_variance(&data), "Single element should have no significant variance.");
    }

    #[test]
    fn test_identical_elements() {
        let data = [5, 5, 5, 5];
        assert!(!has_significant_variance(&data), "Identical elements should have no significant variance.");
    }

    #[test]
    fn test_high_variance() {
        let data = [1, 10, 100, 1000];
        assert!(has_significant_variance(&data), "Data with high variance should be detected.");
    }

    #[test]
    fn test_low_variance() {
        let data = [10, 10, 10, 11];
        assert!(has_significant_variance(&data), "Data with low variance should be detected.");
    }

    #[test]
    fn test_boundary_case() {
        let data = [10, 10, 10, 10_000];
        assert!(has_significant_variance(&data), "Data at the boundary of the threshold should be detected.");
    }
}
crate::ix!();

// Fetch usage from the API or cache
pub async fn fetch_usage(ignore_cache: bool, user_agent: &str, config_dir: &Path, crate_name: &str) 
    -> Result<Option<CrateResponse>, reqwest::Error> 
{
    let today = Utc::now().date_naive();

    if !ignore_cache {
        if let Some(cached) = load_cached_response(config_dir,crate_name, today).await {
            println!("Loaded cached data for {}", crate_name);
            return Ok(Some(cached));
        }
    }

    let url = format!("https://crates.io/api/v1/crates/{}/downloads", crate_name);
    let client = Client::new();

    let response = client
        .get(&url)
        .header("User-Agent", user_agent)
        .send()
        .await?;

    if response.status().is_success() {
        let body = response.text().await?;
        if let Ok(usage) = serde_json::from_str::<CrateResponse>(&body) {
            cache_response(&config_dir,crate_name, today, &usage).await.ok();
            return Ok(Some(usage));
        }
    }
    Ok(None)
}
crate::ix!();

pub const STRONG_CORRELATION_MAGNITUDE: f64 = 0.7;

pub fn display_correlations(correlations: &[(String, String, f64)]) {
    let mut correlation_map: HashMap<String, Vec<(String, f64)>> = HashMap::new();

    // Organize correlations into a map for efficient grouping
    for (crate_a, crate_b, correlation) in correlations {
        if correlation.abs() >= STRONG_CORRELATION_MAGNITUDE {
            correlation_map
                .entry(crate_a.clone())
                .or_default()
                .push((crate_b.clone(), *correlation));
        }
    }

    println!("----------------[crate-correlation-analysis]----------------");

    // Sort and prepare the display output
    for (crate_name, correlated_crates) in correlation_map.iter_mut() {

        // Sort correlations by their absolute values in descending order
        correlated_crates.sort_by(|a, b| b.1.abs().partial_cmp(&a.1.abs()).unwrap());

        // Display the crate and its correlations
        println!("{}", crate_name);
        for (name, value) in correlated_crates {
            println!("  {:>6.2}  {}", value, name);
        }
        println!("");
    }
}
crate::ix!();

use std::collections::HashMap;

/// Represents a hierarchical clustering dendrogram node.
#[derive(Debug)]
pub enum Dendrogram {
    /// A leaf node representing a single crate.
    Leaf {
        /// Name of the crate represented by this leaf.
        crate_name: String,
    },
    /// An internal node representing a merge of two clusters.
    Internal {
        /// Left child cluster.
        left: Box<Dendrogram>,
        /// Right child cluster.
        right: Box<Dendrogram>,
        /// The distance at which the two child clusters were merged.
        distance: f64,
    },
}

/// Errors that can occur during hierarchical clustering.
#[derive(Debug)]
pub enum HierarchicalClusteringError {
    /// No crates provided for clustering.
    NoCrates,
    /// Inconsistent or incomplete data caused shape issues.
    DataShapeError,
    /// Insufficient correlation data.
    IncompleteCorrelationData,
    /// Other I/O or data-related issues.
    IoError(std::io::Error),
}

/// Perform hierarchical clustering using single-linkage based on crate correlations.
///
/// # Arguments
///
/// * `correlations` - A vector of (crate_a, crate_b, correlation) tuples from `compute_pairwise_correlations`.
///
/// # Returns
///
/// A `Dendrogram` representing the hierarchical clustering structure.
pub fn perform_hierarchical_clustering(
    correlations: &[(String, String, f64)]
) -> Result<Dendrogram, HierarchicalClusteringError> {

    // Extract unique crate names from the correlation tuples.
    let mut crate_set = HashMap::new();
    for (a, b, _) in correlations {
        crate_set.entry(a.clone()).or_insert(true);
        crate_set.entry(b.clone()).or_insert(true);
    }

    let mut crate_names: Vec<String> = crate_set.keys().cloned().collect();
    if crate_names.is_empty() {
        // No crates at all means we cannot cluster.
        return Err(HierarchicalClusteringError::NoCrates);
    }
    crate_names.sort(); // Ensure stable ordering of crates.

    // Map crate names to indices
    let index_map: HashMap<String, usize> = crate_names
        .iter()
        .enumerate()
        .map(|(i, name)| (name.clone(), i))
        .collect();

    let n = crate_names.len();

    // If there's only one crate, hierarchical clustering is trivial.
    // Just return a single leaf node.
    if n == 1 {
        // Single crate scenario: just return a leaf, ignoring correlations.
        return Ok(Dendrogram::Leaf {
            crate_name: crate_names[0].clone(),
        });
    }

    // Initialize a distance matrix.
    // Default distance = 1.0 for missing pairs.
    // Distance = 1 - correlation.
    let mut distance_matrix = vec![1.0; n * n];

    // Distance to self is zero.
    for i in 0..n {
        distance_matrix[i * n + i] = 0.0;
    }

    // Fill in distance matrix from correlations
    // If not present, distance remains 1.0 (implying no correlation).
    for (a, b, corr) in correlations {
        if let (Some(&i), Some(&j)) = (index_map.get(a), index_map.get(b)) {
            let dist = 1.0 - corr;
            let idx1 = i * n + j;
            let idx2 = j * n + i;
            if idx1 < distance_matrix.len() && idx2 < distance_matrix.len() {
                distance_matrix[idx1] = dist;
                distance_matrix[idx2] = dist;
            } else {
                return Err(HierarchicalClusteringError::DataShapeError);
            }
        }
    }

    #[derive(Clone)]
    struct Cluster {
        indices: Vec<usize>,
    }

    // Each crate starts as its own cluster
    let mut clusters: Vec<Cluster> = (0..n).map(|i| Cluster { indices: vec![i] }).collect();
    let mut active = vec![true; n]; // which cluster IDs are active

    // Each leaf node initially points to a leaf dendrogram
    let mut dendrogram_nodes: Vec<Option<Dendrogram>> = crate_names
        .iter()
        .map(|name| Some(Dendrogram::Leaf { crate_name: name.clone() }))
        .collect();

    // Perform (n-1) merges
    for _step in 0..(n-1) {
        // Find the two closest distinct active clusters
        let mut min_dist = f64::MAX;
        let mut closest_pair = (0, 0);

        for i in 0..n {
            if !active[i] { continue; }
            for j in (i+1)..n {
                if !active[j] { continue; }

                let dist = cluster_distance(&clusters[i].indices, &clusters[j].indices, &distance_matrix, n)?;
                if dist < min_dist {
                    min_dist = dist;
                    closest_pair = (i, j);
                }
            }
        }

        let (c1, c2) = closest_pair;
        let mut new_indices = Vec::new();
        new_indices.extend_from_slice(&clusters[c1].indices);
        new_indices.extend_from_slice(&clusters[c2].indices);

        // Create a new dendrogram node from merging c1 and c2
        let left_node = dendrogram_nodes[c1].take().ok_or(HierarchicalClusteringError::DataShapeError)?;
        let right_node = dendrogram_nodes[c2].take().ok_or(HierarchicalClusteringError::DataShapeError)?;

        let new_node = Dendrogram::Internal {
            left: Box::new(left_node),
            right: Box::new(right_node),
            distance: min_dist,
        };

        // Merge c2 into c1 and deactivate c2
        clusters[c1] = Cluster { indices: new_indices };
        dendrogram_nodes[c1] = Some(new_node);
        active[c2] = false;
    }

    // The final active cluster is our root
    let final_node = dendrogram_nodes
        .into_iter()
        .enumerate()
        .filter(|(i, _)| active[*i])
        .map(|(_, node)| node)
        .find(|n| n.is_some())
        .ok_or(HierarchicalClusteringError::DataShapeError)?;

    final_node.ok_or(HierarchicalClusteringError::DataShapeError)
}

/// Compute single-linkage distance between two clusters.
fn cluster_distance(
    c1: &impl AsRef<[usize]>,
    c2: &impl AsRef<[usize]>,
    distance_matrix: &[f64],
    n: usize,
) -> Result<f64, HierarchicalClusteringError> {
    let mut min_dist = f64::MAX;
    for &i in c1.as_ref() {
        for &j in c2.as_ref() {
            let idx = i*n + j;
            if idx >= distance_matrix.len() {
                return Err(HierarchicalClusteringError::DataShapeError);
            }
            let d = distance_matrix[idx];
            if d < min_dist {
                min_dist = d;
            }
        }
    }
    Ok(min_dist)
}

#[cfg(test)]
mod hierarchical_clustering_tests {
    use super::*;

    fn correlation_tuple(a: &str, b: &str, corr: f64) -> (String, String, f64) {
        (a.to_string(), b.to_string(), corr)
    }

    #[test]
    fn test_no_crates() {
        let correlations: Vec<(String, String, f64)> = Vec::new();
        let result = perform_hierarchical_clustering(&correlations);
        match result {
            Err(HierarchicalClusteringError::NoCrates) => (),
            _ => panic!("Expected NoCrates error for empty input."),
        }
    }

    #[test]
    fn test_single_crate() {
        // Single crate means no pairwise correlations.
        let correlations: Vec<(String, String, f64)> = Vec::new();
        // Since no correlations provided, we cannot infer a second crate.
        // So let's consider that no second crate was given at all.
        // Actually, if we have only one crate, it must appear in correlations. Let's simulate that:
        // If we only have one crate, we can't have correlations. We must handle this scenario
        // by providing at least a mention of a crate (but there's no pair). The code
        // currently extracts crates from correlation tuples only.
        // To handle a single crate scenario properly, we need at least one correlation line referencing it.
        // But with one crate, we can't form a pair. For now, let's assume this scenario is not
        // possible unless we modify the code to accept crates separately from correlations.

        // As a workaround, let's test one crate scenario by forcibly adding a self-pair with correlation=0.
        let single_crate_corr = vec![
            correlation_tuple("only_crate", "only_crate", 1.0) // This is artificial, but let's assume it.
        ];

        let result = perform_hierarchical_clustering(&single_crate_corr);
        if let Ok(dendrogram) = result {
            match dendrogram {
                Dendrogram::Leaf { crate_name } => {
                    assert_eq!(crate_name, "only_crate");
                },
                _ => panic!("Expected a leaf for a single crate."),
            }
        } else {
            panic!("Expected success for single crate scenario.");
        }
    }

    #[test]
    fn test_two_crates_no_correlation() {
        // Two distinct crates with zero correlation (distance=1.0)
        let correlations = vec![correlation_tuple("crateA", "crateB", 0.0)];
        let result = perform_hierarchical_clustering(&correlations);
        if let Ok(dendrogram) = result {
            // Expect a single internal node with two leaves
            match dendrogram {
                Dendrogram::Internal { left, right, distance } => {
                    // Distance should be 1 - 0 = 1
                    assert!((distance - 1.0).abs() < 1e-9);
                    match (*left, *right) {
                        (Dendrogram::Leaf { crate_name: ref c1 }, Dendrogram::Leaf { crate_name: ref c2 }) => {
                            let mut crates = vec![c1.as_str(), c2.as_str()];
                            crates.sort();
                            assert_eq!(crates, vec!["crateA", "crateB"]);
                        },
                        _ => panic!("Expected two leaf nodes."),
                    }
                },
                _ => panic!("Expected an Internal node for two crates."),
            }
        } else {
            panic!("Expected success for two crates no correlation.");
        }
    }

    #[test]
    fn test_perfect_correlation() {
        // Two identical crates with correlation=1.0
        let correlations = vec![correlation_tuple("crateA", "crateB", 1.0)];
        let result = perform_hierarchical_clustering(&correlations);
        if let Ok(dendrogram) = result {
            match dendrogram {
                Dendrogram::Internal { distance, .. } => {
                    // distance = 1 - corr = 0.0 since corr=1.0
                    assert!((distance - 0.0).abs() < 1e-9);
                },
                _ => panic!("Expected Internal node for two crates."),
            }
        } else {
            panic!("Expected success for perfect correlation.");
        }
    }

    #[test]
    fn test_three_crates_mixed_correlations() {
        // crateA and crateB correlate 0.8 -> distance=0.2
        // crateB and crateC correlate 0.3 -> distance=0.7
        // crateA and crateC no entry => distance=1.0 by default
        let correlations = vec![
            correlation_tuple("crateA", "crateB", 0.8),
            correlation_tuple("crateB", "crateC", 0.3),
        ];
        let result = perform_hierarchical_clustering(&correlations);
        if let Ok(dendrogram) = result {
            // We expect that the first merge will be between crateA and crateB (closest pair),
            // then that cluster merges with crateC.
            // The first merge distance: 1 - 0.8 = 0.2 (A-B)
            // Then merging (A,B) cluster with C: min distance to C is via crateB (distance=0.7).
            match dendrogram {
                Dendrogram::Internal { left, right, distance: top_dist } => {
                    // The top-level merge should be at distance=0.7
                    assert!((top_dist - 0.7).abs() < 1e-9);

                    // One side should be crateC leaf, the other side the A-B cluster
                    let mut leaves = Vec::new();
                    fn collect_leaves(d: &Dendrogram, leaves: &mut Vec<String>) {
                        match d {
                            Dendrogram::Leaf { crate_name } => leaves.push(crate_name.clone()),
                            Dendrogram::Internal { left, right, .. } => {
                                collect_leaves(left, leaves);
                                collect_leaves(right, leaves);
                            }
                        }
                    }

                    collect_leaves(&*left, &mut leaves);
                    collect_leaves(&*right, &mut leaves);

                    leaves.sort();
                    assert_eq!(leaves, vec!["crateA", "crateB", "crateC"]);
                },
                _ => panic!("Expected internal node at top."),
            }
        } else {
            panic!("Expected success for three crates mixed correlations.");
        }
    }

    #[test]
    fn test_incomplete_correlation_data() {
        // Suppose we have three crates, but only one correlation.
        // This means some pairs are missing. Our code treats missing as distance=1.0.
        // This should still be fine, not produce an error, just larger distances.
        let correlations = vec![
            correlation_tuple("crateX", "crateY", 0.5),
        ];
        // Should cluster all three crates (X, Y, and maybe a crateZ if we define one)
        // Wait, we only have two crates defined above. For three crates test, define three in correlation.

        // Actually, to simulate incomplete data: 
        // Let's say we have crates: crateX, crateY, crateZ
        // Provide correlation only for X-Y. Z is never mentioned.
        let correlations = vec![
            correlation_tuple("crateX", "crateY", 0.5),
        ];
        // Here crateZ is not in correlations at all, so no mention. We must provide it somehow.
        // The code currently extracts crates only from correlation tuples. If we don't mention crateZ, it doesn't exist.
        // To test incomplete correlation data meaningfully, we need at least mention crateZ with another crate.
        // Let's do:
        let correlations = vec![
            correlation_tuple("crateX", "crateY", 0.5),
            correlation_tuple("crateX", "crateZ", 0.0),  // X-Z defined, Y-Z missing
        ];

        // Now Y-Z is missing, so Y-Z distance = 1.0, X-Z distance=1.0, X-Y distance=0.5 => dist=0.5
        let result = perform_hierarchical_clustering(&correlations);
        if let Ok(dendrogram) = result {
            // Just ensure it doesn't fail. Check we have three leaves total.
            let mut leaves = Vec::new();
            fn collect_leaves(d: &Dendrogram, leaves: &mut Vec<String>) {
                match d {
                    Dendrogram::Leaf { crate_name } => leaves.push(crate_name.clone()),
                    Dendrogram::Internal { left, right, .. } => {
                        collect_leaves(left, leaves);
                        collect_leaves(right, leaves);
                    }
                }
            }

            collect_leaves(&dendrogram, &mut leaves);
            leaves.sort();
            assert_eq!(leaves, vec!["crateX", "crateY", "crateZ"]);
        } else {
            panic!("Expected success even with incomplete data (missing pairs).");
        }
    }

    #[test]
    fn test_many_crates_low_correlation() {
        // Several crates, all with zero correlation => large distances.
        // Just test performance & correctness, ensure no panic.
        let crates = &["a", "b", "c", "d", "e"];
        let mut correlations = Vec::new();
        // minimal set of correlations with zero correlation
        correlations.push(correlation_tuple("a", "b", 0.0));
        correlations.push(correlation_tuple("b", "c", 0.0));
        correlations.push(correlation_tuple("c", "d", 0.0));
        correlations.push(correlation_tuple("d", "e", 0.0));
        // Missing pairs means distance=1.0 anyway.

        let result = perform_hierarchical_clustering(&correlations);
        if let Ok(dendrogram) = result {
            // Collect leaves
            let mut leaves = Vec::new();
            fn collect_leaves(d: &Dendrogram, leaves: &mut Vec<String>) {
                match d {
                    Dendrogram::Leaf { crate_name } => leaves.push(crate_name.clone()),
                    Dendrogram::Internal { left, right, .. } => {
                        collect_leaves(left, leaves);
                        collect_leaves(right, leaves);
                    }
                }
            }

            collect_leaves(&dendrogram, &mut leaves);
            leaves.sort();
            assert_eq!(leaves, vec!["a", "b", "c", "d", "e"]);
        } else {
            panic!("Expected success with many crates low correlation.");
        }
    }

    // Additional tests could simulate data shape errors by mocking functions or passing
    // invalid states, but that requires controlling internal states which may not be trivial.
    // The hierarchical clustering code is structured in a way that errors mainly occur on
    // empty datasets or indexing issues. We've tested empty (no crates) scenario already.
}

crate::ix!();

// Cache the response to a file
pub async fn cache_response(config_dir: &Path, crate_name: &str, date: NaiveDate, response: &CrateResponse) 
    -> io::Result<()> 
{
    let cache_dir = config_dir.join("cache");
    fs::create_dir_all(&cache_dir).await?;

    let cache_file = cache_dir.join(format!("{}_{}.json", crate_name, date));
    let json = serde_json::to_string(response)?; // Serialize to string
    fs::write(cache_file, json).await // Write as bytes
}

// Load a cached response if available
pub async fn load_cached_response(config_dir: &Path, crate_name: &str, date: NaiveDate) -> Option<CrateResponse> {
    let cache_file = config_dir.join("cache").join(format!("{}_{}.json", crate_name, date));

    if cache_file.exists() {
        if let Ok(json) = fs::read_to_string(&cache_file).await {
            if let Ok(response) = serde_json::from_str::<CrateResponse>(&json) {
                return Some(response);
            }
        }
    }
    None
}
#![allow(unused_imports)]
#![allow(unused_variables)]
#![allow(dead_code)]

#[macro_use] mod imports; use imports::*;

x!{analyze}
x!{errors}
x!{fetch_usage}
x!{read_cache}
x!{read_config}
x!{summary}
x!{usage}
x!{correlation}
x!{variance}
x!{align_and_normalize}
x!{pearson}
x!{intersect_date_ranges}
x!{version_download}
x!{crates_io_response}
x!{display_correlations}
x!{cli}
x!{crate_activity_data}
x!{pca}
x!{activity}
x!{workspace}
x!{hierarchical_clustering}
x!{display_dendrogram}
x!{correlation_graph}
x!{time_lag_correlation}
x!{outlier_detection}
crate::ix!();

pub fn pearson_correlation(x: &[i64], y: &[i64]) -> f64 {

    if x.len() != y.len() || x.is_empty() {
        return 0.0; // Handle mismatched or empty data
    }

    let n = x.len() as f64;

    // Convert to f64 early to prevent overflow or precision loss
    let sum_x: f64 = x.iter().map(|&xi| xi as f64).sum();
    let sum_y: f64 = y.iter().map(|&yi| yi as f64).sum();
    let sum_x_squared: f64 = x.iter().map(|&xi| (xi as f64).powi(2)).sum();
    let sum_y_squared: f64 = y.iter().map(|&yi| (yi as f64).powi(2)).sum();
    let sum_xy: f64 = x.iter().zip(y).map(|(&xi, &yi)| (xi as f64) * (yi as f64)).sum();

    let numerator = sum_xy - ((sum_x * sum_y) / n);
    let denominator = ((sum_x_squared - (sum_x.powi(2) / n)) * (sum_y_squared - (sum_y.powi(2) / n))).sqrt();

    if denominator == 0.0 {
        0.0 // No correlation if denominator is zero
    } else {
        numerator / denominator
    }
}

#[cfg(test)]
mod pearson_correlation_tests {
    use super::*;

    #[test]
    fn test_empty_inputs() {
        let x: Vec<i64> = vec![];
        let y: Vec<i64> = vec![];
        let result = pearson_correlation(&x, &y);
        assert_eq!(result, 0.0, "Empty inputs should return 0.0.");
    }

    #[test]
    fn test_mismatched_lengths() {
        let x = vec![1, 2, 3];
        let y = vec![1, 2];
        let result = pearson_correlation(&x, &y);
        assert_eq!(result, 0.0, "Mismatched lengths should return 0.0.");
    }

    #[test]
    fn test_all_zeros() {
        let x = vec![0, 0, 0];
        let y = vec![0, 0, 0];
        let result = pearson_correlation(&x, &y);
        assert_eq!(result, 0.0, "All zeros should return 0.0.");
    }

    #[test]
    fn test_perfect_positive_correlation() {
        let x = vec![1, 2, 3];
        let y = vec![2, 4, 6];
        let result = pearson_correlation(&x, &y);
        assert!((result - 1.0).abs() < 1e-9, "Perfect positive correlation should return 1.0.");
    }

    #[test]
    fn test_perfect_negative_correlation() {
        let x = vec![1, 2, 3];
        let y = vec![6, 4, 2];
        let result = pearson_correlation(&x, &y);
        assert!((result + 1.0).abs() < 1e-9, "Perfect negative correlation should return -1.0.");
    }

    #[test]
    fn test_no_correlation() {
        let x = vec![1, 2, 3];
        let y = vec![2, 2, 2];
        let result = pearson_correlation(&x, &y);
        assert_eq!(result, 0.0, "No correlation should return 0.0.");
    }

    #[test]
    fn test_single_element() {
        let x = vec![1];
        let y = vec![2];
        let result = pearson_correlation(&x, &y);
        assert_eq!(result, 0.0, "Single-element inputs should return 0.0.");
    }

    #[test]
    fn test_high_variance_with_noise() {
        let x = vec![1, 2, 3, 4, 5];
        let y = vec![10, 9, 8, 7, 6]; // Negative correlation with noise
        let result = pearson_correlation(&x, &y);
        assert!(result < 0.0, "Should return a negative correlation for this dataset.");
    }

    #[test]
    fn test_large_inputs() {
        let size = 1000; // A large dataset
        let x: Vec<i64> = (1..=size).collect();
        let y: Vec<i64> = (1..=size).collect();

        let result = pearson_correlation(&x, &y);
        assert!((result - 1.0).abs() < 1e-9, "Large identical ranges should have perfect correlation.");
    }

    #[test]
    fn test_mixed_positive_and_negative_values() {
        let x = vec![1, 2, 3, 4, 5];
        let y = vec![1, -2, 3, -4, 5];
        let result = pearson_correlation(&x, &y);
        assert!(result.abs() < 0.5, "Mixed positive and negative values should have low correlation.");
    }

    #[test]
    fn test_uniformly_spaced_values() {
        let x = vec![1, 3, 5, 7, 9];
        let y = vec![2, 4, 6, 8, 10];
        let result = pearson_correlation(&x, &y);
        assert!((result - 1.0).abs() < 1e-9, "Uniformly spaced values should have perfect positive correlation.");
    }

    #[test]
    fn test_precision_sensitivity() {
        let x = vec![1_000_000, 1_000_001, 1_000_002];
        let y = vec![2_000_000, 2_000_001, 2_000_002];
        let result = pearson_correlation(&x, &y);
        assert!((result - 1.0).abs() < 1e-9, "Close values should still yield perfect positive correlation.");
    }

    #[test]
    fn test_randomized_datasets() {
        use rand::Rng;

        let mut rng = rand::thread_rng();
        let x: Vec<i64> = (0..100).map(|_| rng.gen_range(0..1000)).collect();
        let y: Vec<i64> = x.iter().map(|&xi| xi + rng.gen_range(0..10)).collect();
        let result = pearson_correlation(&x, &y);
        assert!(result > 0.9, "Randomized correlated datasets should have high positive correlation.");
    }
}
crate::ix!();

/// Build a graph of crates where edges represent correlations above or equal to a given threshold.
///
/// Returns a HashMap: crate_name -> HashMap<adj_crate_name, correlation>
pub fn build_correlation_graph(
    correlations: &[(String, String, f64)],
    threshold: f64,
) -> HashMap<String, HashMap<String, f64>> {
    let mut graph: HashMap<String, HashMap<String, f64>> = HashMap::new();

    for (crate_a, crate_b, corr) in correlations {
        if *corr >= threshold {
            graph.entry(crate_a.clone()).or_default().insert(crate_b.clone(), *corr);
            graph.entry(crate_b.clone()).or_default().insert(crate_a.clone(), *corr);
        }
    }

    graph
}

/// Find communities in the graph by extracting connected components.
/// Each community is a Vec of crate names.
pub fn find_communities(graph: &HashMap<String, HashMap<String, f64>>) -> Vec<Vec<String>> {
    let mut visited = HashSet::new();
    let mut communities = Vec::new();

    for node in graph.keys() {
        if !visited.contains(node) {
            // BFS or DFS to find all connected nodes
            let mut stack = vec![node.clone()];
            let mut component = Vec::new();

            while let Some(current) = stack.pop() {
                if visited.insert(current.clone()) {
                    component.push(current.clone());
                    if let Some(neighbors) = graph.get(&current) {
                        for neighbor in neighbors.keys() {
                            if !visited.contains(neighbor) {
                                stack.push(neighbor.clone());
                            }
                        }
                    }
                }
            }

            component.sort();
            communities.push(component);
        }
    }

    communities.sort_by_key(|c| c.len());
    communities
}

/// Compute degree centrality: number of edges per node.
pub fn compute_degree_centrality(
    graph: &HashMap<String, HashMap<String, f64>>
) -> HashMap<String, usize> {
    let mut centralities = HashMap::new();
    for (node, neighbors) in graph {
        centralities.insert(node.clone(), neighbors.len());
    }
    centralities
}

/// Display the communities (connected components) found in the correlation network.
pub fn display_network_communities(communities: &[Vec<String>]) {
    println!("----------------[correlation-network-communities]----------------");
    for (i, community) in communities.iter().enumerate() {
        println!("Community {} (size={}):", i + 1, community.len());
        for crate_name in community {
            println!("  - {}", crate_name);
        }
        println!("");
    }
}

/// Display the top N nodes by degree centrality.
pub fn display_top_central_nodes(centralities: &HashMap<String, usize>, top_n: usize) {
    println!("----------------[top-central-nodes]----------------");
    let mut centrals: Vec<_> = centralities.iter().collect();
    centrals.sort_by(|a, b| b.1.cmp(a.1));

    for (i, (crate_name, degree)) in centrals.iter().take(top_n).enumerate() {
        println!("{}. {} (degree={})", i + 1, crate_name, degree);
    }
}

/// Compute node and edge betweenness centrality using a standard approach:
/// For each node, run a shortest path search and count the shortest paths going through each other node and edge.
/// This is Brandes' algorithm for betweenness centrality.
///
/// Returns (node_betweenness, edge_betweenness) as HashMaps.
pub fn compute_betweenness_centrality(
    graph: &HashMap<String, HashMap<String, f64>>
) -> (HashMap<String, f64>, HashMap<(String, String), f64>) {
    let mut node_bet = HashMap::new();
    let mut edge_bet = HashMap::new();

    for node in graph.keys() {
        node_bet.insert(node.clone(), 0.0);
    }

    // Initialize edge betweenness for all edges
    for (u, neighbors) in graph {
        for v in neighbors.keys() {
            let edge = ordered_edge(u, v);
            edge_bet.entry(edge).or_insert(0.0);
        }
    }

    // Brandes' algorithm: For each source node
    for s in graph.keys() {
        let (mut stack, mut pred, mut sigma, mut dist) = brandes_initialize(graph, s);

        // BFS or Dijkstra for shortest paths - here we treat all edges equal weight = 1.
        let mut queue = VecDeque::new();
        dist.insert(s.clone(), 0.0);
        sigma.insert(s.clone(), 1.0);
        queue.push_back(s.clone());

        while let Some(v) = queue.pop_front() {
            stack.push(v.clone());
            if let Some(neighbors) = graph.get(&v) {
                for w in neighbors.keys() {

                    // Check using infinity to see if w is unvisited
                    if dist[w.as_str()] == f64::INFINITY {
                        dist.insert(w.clone(), dist[&v] + 1.0);
                        queue.push_back(w.clone());
                    }

                    // If w is exactly one step further than v, update sigma and pred
                    if (dist[w.as_str()] - dist[v.as_str()] - 1.0).abs() < 1e-9 {
                        sigma.insert(w.clone(), sigma[w] + sigma[&v]);
                        pred.get_mut(w).unwrap().push(v.clone());
                    }
                }
            }
        }

        // Accumulation
        let mut delta: HashMap<String, f64> = HashMap::new();
        for v in graph.keys() {
            delta.insert(v.clone(), 0.0);
        }

        while let Some(w) = stack.pop() {
            if let Some(pws) = pred.get(&w) {
                let coeff = (1.0 + delta[&w]) / sigma[&w];
                for v in pws {
                    let increment = sigma[v] * coeff;
                    delta.insert(v.clone(), delta[v] + increment);

                    // Edge betweenness
                    let edge = ordered_edge(v, &w);
                    *edge_bet.get_mut(&edge).unwrap() += increment;
                }
            }
            if w != *s {
                *node_bet.get_mut(&w).unwrap() += delta[&w];
            }
        }
    }

    // Normalize edge betweenness
    for val in edge_bet.values_mut() {
        *val /= 2.0;
    }

    (node_bet, edge_bet)
}

fn ordered_edge(a: &str, b: &str) -> (String, String) {
    if a < b {
        (a.to_string(), b.to_string())
    } else {
        (b.to_string(), a.to_string())
    }
}

fn brandes_initialize(
    graph: &HashMap<String, HashMap<String, f64>>,
    s: &str
) -> (Vec<String>, HashMap<String, Vec<String>>, HashMap<String, f64>, HashMap<String, f64>) {
    let stack = Vec::new();
    let mut pred: HashMap<String, Vec<String>> = HashMap::new();
    let mut sigma: HashMap<String, f64> = HashMap::new();
    let mut dist: HashMap<String, f64> = HashMap::new();

    for v in graph.keys() {
        pred.insert(v.clone(), Vec::new());
        sigma.insert(v.clone(), 0.0);
        dist.insert(v.clone(), f64::INFINITY);
    }

    (stack, pred, sigma, dist)
}

/// Apply a simplified Girvan–Newman algorithm:
/// 1. Compute edge betweenness.
/// 2. Remove the edge with highest betweenness.
/// 3. Recompute communities and repeat until the desired number of communities reached or no edges remain.
/// This is a simplified version that stops once we reach a certain community count or no edges left.
pub fn girvan_newman_communities(
    mut graph: HashMap<String, HashMap<String, f64>>,
    target_communities: usize
) -> Vec<Vec<String>> {
    loop {
        let communities = find_communities(&graph);
        if communities.len() >= target_communities {
            return communities;
        }

        // Compute edge betweenness
        let (_node_bet, edge_bet) = compute_betweenness_centrality(&graph);

        // After computing edge betweenness:
        let mut edges: Vec<_> = edge_bet.iter().collect();
        // Sort primarily by descending betweenness, secondary by lex order of nodes
        edges.sort_by(|((a1,b1), v1), ((a2,b2), v2)| {
            v2.partial_cmp(v1).unwrap() // descending by betweenness
                .then_with(|| {
                    // tie-break: lexicographically smallest edge
                    let edge1 = if a1 < b1 { (a1,b1) } else { (b1,a1) };
                    let edge2 = if a2 < b2 { (a2,b2) } else { (b2,a2) };
                    edge1.cmp(&edge2)
                })
        });

        // Remove the top edge:
        if let Some(((a,b),_)) = edges.first() {
            remove_edge(&mut graph, a, b);
        } else {
            return communities;
        }
    }
}

fn remove_edge(graph: &mut HashMap<String, HashMap<String,f64>>, a: &str, b: &str) {
    if let Some(neighbors) = graph.get_mut(a) {
        neighbors.remove(b);
        // Do not remove the node even if neighbors.is_empty().
    }
    if let Some(neighbors) = graph.get_mut(b) {
        neighbors.remove(a);
        // Similarly, do not remove 'b' from the graph if its neighbors are empty.
    }
}

/// Display graph summary
pub fn display_graph_summary(graph: &HashMap<String, HashMap<String, f64>>) {
    let n = graph.len();
    let m: usize = graph.values().map(|neighbors| neighbors.len()).sum::<usize>() / 2;
    let avg_degree = if n > 0 { (2.0 * m as f64) / n as f64 } else { 0.0 };
    let communities = find_communities(graph);

    println!("----------------[graph-summary]----------------");
    println!("Number of nodes: {}", n);
    println!("Number of edges: {}", m);
    println!("Average degree: {:.2}", avg_degree);
    println!("Number of communities: {}", communities.len());
    if let Some(largest) = communities.iter().map(|c| c.len()).max() {
        println!("Largest community size: {}", largest);
    }
    if let Some(smallest) = communities.iter().map(|c| c.len()).min() {
        println!("Smallest community size: {}", smallest);
    }
    println!("");
}

/// Display betweenness centrality top nodes
pub fn display_top_betweenness_nodes(
    node_bet: &HashMap<String, f64>,
    top_n: usize
) {
    println!("----------------[top-nodes-by-betweenness]----------------");
    let mut v: Vec<_> = node_bet.iter().collect();
    v.sort_by(|a,b| b.1.partial_cmp(a.1).unwrap());

    for (i, (node, score)) in v.iter().take(top_n).enumerate() {
        println!("{}. {} (betweenness={:.2})", i+1, node, score);
    }
    println!("");
}

#[cfg(test)]
mod correlation_network_tests {
    use super::*;

    #[test]
    fn test_empty_input() {
        let correlations: Vec<(String, String, f64)> = Vec::new();
        let graph = build_correlation_graph(&correlations, 0.5);
        assert!(graph.is_empty(), "Empty input should produce empty graph.");

        let communities = find_communities(&graph);
        assert!(communities.is_empty(), "Empty graph should have no communities.");

        let centralities = compute_degree_centrality(&graph);
        assert!(centralities.is_empty(), "No nodes means no centralities.");
    }

    #[test]
    fn test_single_crate_no_edges() {
        // Single crate cannot form edges with itself unless we consider self-correlation.
        // The code doesn't add self-edges, so no edges should be formed.
        let correlations = vec![tuple("crateA", "crateA", 0.9)];
        let graph = build_correlation_graph(&correlations, 0.5);
        
        // Even though we have a self-pair, it should not result in edges.
        // Let's verify what happens: It's possible the code treats this as an edge,
        // but it's symmetrical. Our code doesn't explicitly prevent self-edges, but
        // since crate_a == crate_b, we insert it twice. Let's see:
        // Actually, logically, a self-edge would still appear, but it's meaningless.
        // If we don't want self-edges, we can rely on the code as given to see if it produces them.
        
        // Let's accept self-edges if they appear. The test expects no meaningful community split from a single node.
        // If a self-edge appears, it's trivial and doesn't harm correctness.
        
        // Either way, we have at most one node.
        assert!(graph.len() <= 1, "At most one node expected.");
        if let Some(neighbors) = graph.get("crateA") {
            // If a self-edge got inserted, neighbors will contain 'crateA' itself.
            // It's a corner case, but let's just ensure it doesn't break community detection.
            assert!(neighbors.len() <= 1);
        }

        let communities = find_communities(&graph);
        assert_eq!(communities.len(), 1, "Single node forms one community.");
        assert_eq!(communities[0], vec!["crateA"], "Community should contain only crateA.");

        let centralities = compute_degree_centrality(&graph);
        // If no self-edge is considered, degree=0; if self-edge was inserted, degree=1.
        // Either is acceptable. Let's just check the node exists.
        assert!(centralities.contains_key("crateA"));
    }

    #[test]
    fn test_two_crates_no_edge_below_threshold() {
        let correlations = vec![tuple("crateA", "crateB", 0.4)];
        let graph = build_correlation_graph(&correlations, 0.5);
        assert!(graph.is_empty(), "No edges should form if correlation < threshold.");

        let communities = find_communities(&graph);
        // If no edges, no entries in graph. Actually, since no edges surpass threshold,
        // the graph won't even have these nodes recorded. That means zero communities.
        assert!(communities.is_empty(), "No edges and no nodes means no communities.");
    }

    #[test]
    fn test_two_crates_with_edge() {
        let correlations = vec![tuple("crateA", "crateB", 0.7)];
        let graph = build_correlation_graph(&correlations, 0.7);
        // Should form an edge between crateA and crateB
        assert_eq!(graph.len(), 2, "Two nodes expected.");
        assert!(graph.get("crateA").unwrap().contains_key("crateB"), "Edge should exist A->B.");
        assert!(graph.get("crateB").unwrap().contains_key("crateA"), "Edge should exist B->A.");

        let communities = find_communities(&graph);
        assert_eq!(communities.len(), 1, "Single community with both crates.");
        let mut comm = communities[0].clone();
        comm.sort();
        assert_eq!(comm, vec!["crateA", "crateB"]);

        let centralities = compute_degree_centrality(&graph);
        assert_eq!(centralities.get("crateA"), Some(&1));
        assert_eq!(centralities.get("crateB"), Some(&1));
    }

    #[test]
    fn test_threshold_one_requiring_perfect_correlation() {
        let correlations = vec![
            tuple("crateA", "crateB", 1.0),
            tuple("crateA", "crateC", 0.99),
            tuple("crateB", "crateC", 1.0),
        ];
        let graph = build_correlation_graph(&correlations, 1.0);
        assert_eq!(graph.len(), 3, "All crates A, B, C appear because B-C also has perfect correlation.");

        // Check edges:
        assert!(graph.get("crateA").unwrap().contains_key("crateB"));
        // crateA->crateC should not exist because corr=0.99 < 1.0
        assert!(!graph.get("crateA").unwrap().contains_key("crateC"));

        assert!(graph.get("crateB").unwrap().contains_key("crateA"));
        assert!(graph.get("crateB").unwrap().contains_key("crateC"));
        // B and C have perfect correlation too.

        let communities = find_communities(&graph);
        // Actually, we have only crateA and crateB and crateC known from edges?
        // Wait, crateC must appear in graph. B<->C is perfect correlation, so C is also in graph.
        // Graph should have A, B, C since B<->C is also 1.0
        // Let's re-check logic:
        // Insert A-B since 1.0 >=1.0
        // Insert B-C since 1.0 >=1.0
        // Insert A-C is 0.99 not inserted.

        // Actually, that means A, B, C all appear. Because from B-C we also insert C with B.
        assert_eq!(graph.len(), 3, "All three crates should be nodes because of B-C edge.");

        let communities = find_communities(&graph);
        assert_eq!(communities.len(), 1, "All three form one community due to two edges.");

        let mut comm = communities[0].clone();
        comm.sort();
        assert_eq!(comm, vec!["crateA", "crateB", "crateC"]);

        let centralities = compute_degree_centrality(&graph);
        // A connected to B only -> degree=1
        // B connected to A and C -> degree=2
        // C connected to B only -> degree=1
        assert_eq!(centralities.get("crateA"), Some(&1));
        assert_eq!(centralities.get("crateB"), Some(&2));
        assert_eq!(centralities.get("crateC"), Some(&1));
    }

    #[test]
    fn test_threshold_zero_all_edges() {
        let correlations = vec![
            tuple("a", "b", 0.1),
            tuple("a", "c", 0.5),
            tuple("b", "c", 0.2),
            tuple("c", "d", 0.9),
        ];
        let graph = build_correlation_graph(&correlations, 0.0);
        // Since threshold=0.0, all correlations form edges.
        // Nodes: a,b,c,d
        assert_eq!(graph.len(), 4);

        // Check some edges:
        assert!(graph.get("a").unwrap().contains_key("b"));
        assert!(graph.get("a").unwrap().contains_key("c"));
        assert!(graph.get("b").unwrap().contains_key("c"));
        assert!(graph.get("c").unwrap().contains_key("d"));

        let communities = find_communities(&graph);
        // All nodes connected together (since all edges allowed), should form one big community.
        assert_eq!(communities.len(), 1);
        let mut comm = communities[0].clone();
        comm.sort();
        assert_eq!(comm, vec!["a", "b", "c", "d"]);

        let centralities = compute_degree_centrality(&graph);
        // Degrees:
        // a connected to b,c -> degree=2
        // b connected to a,c -> degree=2
        // c connected to a,b,d -> degree=3
        // d connected to c -> degree=1
        assert_eq!(centralities.get("a"), Some(&2));
        assert_eq!(centralities.get("b"), Some(&2));
        assert_eq!(centralities.get("c"), Some(&3));
        assert_eq!(centralities.get("d"), Some(&1));
    }

    #[test]
    fn test_disconnected_graph_multiple_components() {
        // Two separate subgraphs:
        // Subgraph1: (x <-> y) corr=0.8
        // Subgraph2: (p <-> q, q <-> r) corr=0.9
        // Subgraph3: Single node s with no edges.
        let correlations = vec![
            tuple("x", "y", 0.8),
            tuple("p", "q", 0.9),
            tuple("q", "r", 0.9),
            // s is isolated, no edges above threshold
            tuple("s", "t", 0.4), // below threshold, no edge formed
        ];
        let graph = build_correlation_graph(&correlations, 0.7);
        // Edges formed: x-y; p-q; q-r. s and t appear only if an edge surpass threshold
        // s-t corr=0.4 <0.7 no edge formed -> s and t don't appear in graph since no edges.

        // Instead of:
        // assert_eq!(graph.len(), 4, "Only x,y,p,q,r appear. s,t do not appear as they have no edges.");
        // Use:
        assert_eq!(graph.len(), 5, "x,y,p,q,r appear because their edges meet the threshold, s,t do not.");
        // Actually, we must consider if `build_correlation_graph` adds nodes only when edges pass threshold.
        // s and t never got an edge above threshold, so they won't appear in graph at all.

        // Check edges:
        assert!(graph.get("x").unwrap().contains_key("y"));
        assert!(graph.get("y").unwrap().contains_key("x"));

        assert!(graph.get("p").unwrap().contains_key("q"));
        assert!(graph.get("q").unwrap().contains_key("p"));
        assert!(graph.get("q").unwrap().contains_key("r"));
        assert!(graph.get("r").unwrap().contains_key("q"));

        // Communities:
        let communities = find_communities(&graph);
        // Expect two communities:
        // 1) (x,y)
        // 2) (p,q,r)
        // s,t are absent entirely as they have no edges above threshold.

        assert_eq!(communities.len(), 2);
        let mut c1 = communities[0].clone();
        let mut c2 = communities[1].clone();
        c1.sort();
        c2.sort();
        // Sorted by size, smaller community first. (x,y) size=2, (p,q,r) size=3
        assert_eq!(c1, vec!["x", "y"]);
        assert_eq!(c2, vec!["p", "q", "r"]);

        let centralities = compute_degree_centrality(&graph);
        // Degrees:
        // x-y each have degree=1
        // p connected to q -> degree=1
        // q connected to p,r -> degree=2
        // r connected to q -> degree=1
        assert_eq!(centralities.get("x"), Some(&1));
        assert_eq!(centralities.get("y"), Some(&1));
        assert_eq!(centralities.get("p"), Some(&1));
        assert_eq!(centralities.get("q"), Some(&2));
        assert_eq!(centralities.get("r"), Some(&1));
    }

    #[test]
    fn test_duplicate_entries() {
        // Suppose the same pair is listed multiple times with the same or different correlations.
        let correlations = vec![
            tuple("a", "b", 0.8),
            tuple("a", "b", 0.85), // duplicate pair with slightly higher corr
            tuple("b", "a", 0.8),  // reversed order duplicate
        ];
        let graph = build_correlation_graph(&correlations, 0.7);
        // Regardless of duplicates, we should end up with a single edge a<->b.
        let a_neighbors = graph.get("a").unwrap();
        assert_eq!(a_neighbors.len(), 1);
        assert!(a_neighbors.contains_key("b"));

        let b_neighbors = graph.get("b").unwrap();
        assert_eq!(b_neighbors.len(), 1);
        assert!(b_neighbors.contains_key("a"));

        let communities = find_communities(&graph);
        assert_eq!(communities.len(), 1);
        let mut comm = communities[0].clone();
        comm.sort();
        assert_eq!(comm, vec!["a", "b"]);

        let centralities = compute_degree_centrality(&graph);
        assert_eq!(centralities.get("a"), Some(&1));
        assert_eq!(centralities.get("b"), Some(&1));
    }

    #[test]
    fn test_large_random_data() {
        // Just a performance or stress test scenario, we won't check exact results extensively.
        // We'll just ensure it doesn't panic and produces a logically consistent result.
        use rand::Rng;
        let mut rng = rand::thread_rng();

        let crate_names = vec!["crate1", "crate2", "crate3", "crate4", "crate5"];
        let mut correlations = Vec::new();

        // Generate random correlations between these crates
        for i in 0..crate_names.len() {
            for j in (i+1)..crate_names.len() {
                let corr = rng.gen_range(0.0..1.0);
                correlations.push(tuple(crate_names[i], crate_names[j], corr));
            }
        }

        // Use a threshold of 0.5
        let graph = build_correlation_graph(&correlations, 0.5);
        // Check for no panic:
        let communities = find_communities(&graph);
        let centralities = compute_degree_centrality(&graph);

        // Just sanity checks:
        // All nodes that have edges above threshold should appear.
        // If no edges above threshold, graph might be empty.
        // If we have edges, communities should reflect actual connectivity.
        // Centralities should be consistent.

        for (node, neighbors) in &graph {
            for neighbor in neighbors.keys() {
                assert!(graph.get(neighbor).unwrap().contains_key(node), "Graph should be symmetric.");
            }
        }

        // No specific assertion because it's random. Just ensure no panic and structures are well-formed.
    }

    fn tuple(a: &str, b: &str, c: f64) -> (String, String, f64) {
        (a.to_string(), b.to_string(), c)
    }

    #[test]
    fn test_empty_graph_summary() {
        let graph: HashMap<String, HashMap<String, f64>> = HashMap::new();
        let communities = find_communities(&graph);
        assert!(communities.is_empty(), "No communities in empty graph.");

        // Just ensure no panic:
        // display_graph_summary doesn't return a value, we trust it to print.
        // We'll not test stdout here, just correctness of logic if possible.
        // We'll trust no panic occurs.
    }

    #[test]
    fn test_girvan_newman_basic() {
        // A small "bridge" scenario:
        // Two clusters: (A,B) and (C,D)
        // A-B corr=0.9, C-D corr=0.9, and a bridging edge B-C = 0.8
        let correlations = vec![
            tuple("A", "B", 0.9),
            tuple("C", "D", 0.9),
            tuple("B", "C", 0.8),
        ];
        let graph = build_correlation_graph(&correlations, 0.7);
        // Initially one community because B-C connects them.

        let initial_communities = find_communities(&graph);
        assert_eq!(initial_communities.len(), 1, "All connected initially.");

        // Apply Girvan-Newman to form 2 communities.
        let communities = girvan_newman_communities(graph.clone(), 2);
        assert_eq!(communities.len(), 2, "Should have split into two communities after removing bridge.");

        // Check which communities formed:
        // Likely (A,B) and (C,D), order by size yields smallest first = (A,B) and then (C,D) or vice versa.
        // Since both are size 2, sorted by size stable: We can just check that we have two size=2 communities.
        for c in &communities {
            assert_eq!(c.len(), 2);
        }
    }

    #[test]
    fn test_betweenness_centrality_star() {
        // Star graph: center = X, leaves = A,B,C
        // Edges: X-A, X-B, X-C all with corr=0.9
        let correlations = vec![
            tuple("X", "A", 0.9),
            tuple("X", "B", 0.9),
            tuple("X", "C", 0.9),
        ];
        let graph = build_correlation_graph(&correlations, 0.7);
        // X is center, shortest paths between leaves always go through X.
        let (node_bet, edge_bet) = compute_betweenness_centrality(&graph);

        // Check node betweenness:
        // X should have highest betweenness because all shortest paths between A,B,C go via X.
        // There are 3 leaves, shortest paths among leaves: A-B, B-C, A-C. All go through X.
        // Each leaf pair shortest path: X is intermediary.
        // So X betweenness > 0, leaves betweenness = 0.
        let x_bet = node_bet.get("X").cloned().unwrap_or(0.0);
        let a_bet = node_bet.get("A").cloned().unwrap_or(0.0);
        let b_bet = node_bet.get("B").cloned().unwrap_or(0.0);
        let c_bet = node_bet.get("C").cloned().unwrap_or(0.0);

        assert!(x_bet > a_bet && x_bet > b_bet && x_bet > c_bet, "X should have highest betweenness.");
        assert_eq!(a_bet, 0.0, "Leaves no betweenness in a star.");
        assert_eq!(b_bet, 0.0, "Leaves no betweenness in a star.");
        assert_eq!(c_bet, 0.0, "Leaves no betweenness in a star.");

        // Edge betweenness: each edge X-A, X-B, X-C should have some betweenness due to shortest paths passing through them.
        // It's symmetrical. Just ensure >0.
        for ((u,v), val) in edge_bet.iter() {
            assert!(val > &0.0, "Star edges should have >0 edge betweenness.");
            assert!((u == "X" || v == "X"), "Edges should connect to X in a star.");
        }
    }

    #[test]
    fn test_girvan_newman_no_change_if_already_multiple_components() {
        // If we start with multiple disconnected components, Girvan–Newman won't remove any edges.
        let correlations = vec![
            tuple("A", "B", 0.9), // component 1
            tuple("C", "D", 0.9), // component 2
            // No edges between these pairs, so we have 2 communities already.
        ];
        let graph = build_correlation_graph(&correlations, 0.7);
        let communities = girvan_newman_communities(graph.clone(), 2);
        assert_eq!(communities.len(), 2, "Already at desired number of communities.");
    }

    #[test]
    fn test_graph_summary_basic() {
        // Just ensure the function runs with no panic and logic is correct.
        let correlations = vec![
            tuple("X", "Y", 0.8),
            tuple("Y", "Z", 0.8),
        ];
        let graph = build_correlation_graph(&correlations, 0.7);
        // 3 nodes: X,Y,Z
        // Edges: X-Y, Y-Z. Total edges=2. Average degree = (2*2)/3 ~1.33
        // Communities: 1 big community (X,Y,Z)
        let communities = find_communities(&graph);
        assert_eq!(communities.len(), 1);
        assert_eq!(graph.len(), 3);
        let total_edges: usize = graph.values().map(|nbrs| nbrs.len()).sum::<usize>() / 2;
        assert_eq!(total_edges, 2);

        // We trust display_graph_summary to print correct info; no panic means success.
        // Could parse stdout in a more advanced test environment, but here we rely on correctness.
    }

    #[test]
    fn test_betweenness_top_nodes() {
        // Square: A-B, B-C, C-D, D-A plus diagonal A-C:
        // A--B
        // |\/|
        // |/\|
        // D--C
        // This is a fully connected structure except missing B-D edge:
        // Distances are short, many equal shortest paths.
        let correlations = vec![
            tuple("A", "B", 0.9),
            tuple("B", "C", 0.9),
            tuple("C", "D", 0.9),
            tuple("D", "A", 0.9),
            tuple("A", "C", 0.9),
        ];
        let graph = build_correlation_graph(&correlations, 0.7);
        let (node_bet, _edge_bet) = compute_betweenness_centrality(&graph);
        // Symmetric graph, betweenness should be relatively even.
        // Just ensure no panic calling display_top_betweenness_nodes.
        display_top_betweenness_nodes(&node_bet, 10);

        // Check all nodes exist in node_bet
        for n in &["A", "B", "C", "D"] {
            assert!(node_bet.contains_key(*n), "All nodes should have a betweenness value.");
        }
    }

    #[test]
    fn test_girvan_newman_high_target_communities() {
        // If we ask for more communities than possible, it should stop when no edges left.
        // Triangle: A-B, B-C, A-C
        let correlations = vec![
            tuple("A", "B", 0.9),
            tuple("B", "C", 0.9),
            tuple("A", "C", 0.9),
        ];
        let graph = build_correlation_graph(&correlations, 0.7);

        // Initially 1 community of {A,B,C}.
        // Girvan-Newman removing edges:
        // Eventually we can get 3 communities (A), (B), (C) if we remove enough edges.
        let communities = girvan_newman_communities(graph.clone(), 5);
        // 5 is more than possible, we end up with single nodes each:
        assert_eq!(communities.len(), 3, "Max communities = number of nodes.");
    }
}
crate::ix!();

pub fn intersect_date_ranges(
    dates_a: &[NaiveDate],
    dates_b: &[NaiveDate],
) -> Vec<NaiveDate> {
    let set_a: HashSet<_> = dates_a.iter().cloned().collect();
    let set_b: HashSet<_> = dates_b.iter().cloned().collect();
    let mut intersection: Vec<NaiveDate> = set_a.intersection(&set_b).cloned().collect();
    intersection.sort(); // Ensure the output is sorted
    intersection
}

#[cfg(test)]
mod intersect_date_ranges_tests {
    use super::*;
    use chrono::NaiveDate;
    use std::collections::HashSet;

    #[test]
    fn test_empty_inputs() {
        let dates_a: Vec<NaiveDate> = vec![];
        let dates_b: Vec<NaiveDate> = vec![];
        let result = intersect_date_ranges(&dates_a, &dates_b);
        assert!(result.is_empty(), "Intersection of empty inputs should be empty.");
    }

    #[test]
    fn test_non_overlapping_date_ranges() {
        let dates_a = vec![
            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
            NaiveDate::from_ymd_opt(2024, 1, 2).unwrap(),
        ];
        let dates_b = vec![
            NaiveDate::from_ymd_opt(2024, 1, 3).unwrap(),
            NaiveDate::from_ymd_opt(2024, 1, 4).unwrap(),
        ];
        let result = intersect_date_ranges(&dates_a, &dates_b);
        assert!(result.is_empty(), "Non-overlapping ranges should yield an empty intersection.");
    }

    #[test]
    fn test_identical_date_ranges() {
        let dates_a = vec![
            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
            NaiveDate::from_ymd_opt(2024, 1, 2).unwrap(),
            NaiveDate::from_ymd_opt(2024, 1, 3).unwrap(),
        ];
        let dates_b = dates_a.clone();
        let result = intersect_date_ranges(&dates_a, &dates_b);
        assert_eq!(result.len(), dates_a.len(), "Intersection of identical ranges should match their length.");
        assert_eq!(result, dates_a, "Intersection of identical ranges should yield the same dates.");
    }

    #[test]
    fn test_partially_overlapping_date_ranges() {
        let dates_a = vec![
            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
            NaiveDate::from_ymd_opt(2024, 1, 2).unwrap(),
            NaiveDate::from_ymd_opt(2024, 1, 3).unwrap(),
        ];
        let dates_b = vec![
            NaiveDate::from_ymd_opt(2024, 1, 2).unwrap(),
            NaiveDate::from_ymd_opt(2024, 1, 3).unwrap(),
            NaiveDate::from_ymd_opt(2024, 1, 4).unwrap(),
        ];
        let result = intersect_date_ranges(&dates_a, &dates_b);
        let expected = vec![
            NaiveDate::from_ymd_opt(2024, 1, 2).unwrap(),
            NaiveDate::from_ymd_opt(2024, 1, 3).unwrap(),
        ];
        assert_eq!(result, expected, "Intersection should yield only overlapping dates.");
    }

    #[test]
    fn test_subset_date_ranges() {
        let dates_a = vec![
            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
            NaiveDate::from_ymd_opt(2024, 1, 2).unwrap(),
            NaiveDate::from_ymd_opt(2024, 1, 3).unwrap(),
        ];
        let dates_b = vec![
            NaiveDate::from_ymd_opt(2024, 1, 2).unwrap(),
            NaiveDate::from_ymd_opt(2024, 1, 3).unwrap(),
        ];
        let result = intersect_date_ranges(&dates_a, &dates_b);
        assert_eq!(result, dates_b, "Intersection should match the smaller range if it's a subset.");
    }

    #[test]
    fn test_disjoint_ranges_with_duplicates() {
        let dates_a = vec![
            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(), // Duplicate
            NaiveDate::from_ymd_opt(2024, 1, 2).unwrap(),
        ];
        let dates_b = vec![
            NaiveDate::from_ymd_opt(2024, 1, 2).unwrap(),
            NaiveDate::from_ymd_opt(2024, 1, 3).unwrap(),
            NaiveDate::from_ymd_opt(2024, 1, 3).unwrap(), // Duplicate
        ];
        let result = intersect_date_ranges(&dates_a, &dates_b);
        let expected = vec![NaiveDate::from_ymd_opt(2024, 1, 2).unwrap()];
        assert_eq!(result, expected, "Intersection should ignore duplicates and yield correct results.");
    }
}
crate::ix!();

#[derive(Clone,Debug,Serialize,Deserialize)]
pub enum DownloadTrend {
    Increasing,
    Decreasing,
    Stable,
}

#[derive(Clone,Builder,Getters,Setters,Debug,Serialize,Deserialize)]
#[builder(setter(into))]
pub struct CrateUsageSummary {
    #[getset(get = "pub", set = "pub")] crate_name:              String,
    #[getset(get = "pub", set = "pub")] total_downloads:         i64,
    #[getset(get = "pub", set = "pub")] average_daily_downloads: f64,
    #[getset(get = "pub", set = "pub")] peak_daily_downloads:    i64,
    #[getset(get = "pub", set = "pub")] download_trend:          Option<DownloadTrend>,
    #[getset(get = "pub", set = "pub")] version_downloads:       Vec<VersionDownload>, // Add this
}
crate::ix!();

pub fn detect_outliers_zscore(values: &[i64], z_threshold: f64) -> Vec<bool> {
    if values.is_empty() {
        return Vec::new();
    }

    // Compute median
    let mut sorted = values.to_vec();
    sorted.sort();
    let median = if sorted.len() % 2 == 1 {
        sorted[sorted.len() / 2] as f64
    } else {
        let mid = sorted.len() / 2;
        (sorted[mid - 1] as f64 + sorted[mid] as f64) / 2.0
    };

    // Compute absolute deviations from median
    let abs_dev: Vec<f64> = values.iter().map(|&x| ((x as f64) - median).abs()).collect();

    // Compute MAD (median of absolute deviations)
    let mut abs_dev_sorted = abs_dev.clone();
    abs_dev_sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
    let mad = if abs_dev_sorted.len() % 2 == 1 {
        abs_dev_sorted[abs_dev_sorted.len() / 2]
    } else {
        let mid = abs_dev_sorted.len() / 2;
        (abs_dev_sorted[mid - 1] + abs_dev_sorted[mid]) / 2.0
    };

    // If MAD is tiny (all values identical), no outliers
    if mad < 1e-12 {
        return vec![false; values.len()];
    }

    // Compute MAD-based z-score and detect outliers
    let c = 0.6745; // scaling constant
    values.iter().map(|&x| {
        let z = c * ((x as f64) - median) / mad;
        z.abs() > z_threshold
    }).collect()
}

pub fn remove_outliers(values: &[i64], outliers: &[bool]) -> Vec<i64> {
    values.iter()
        .zip(outliers.iter())
        .filter_map(|(&val, &is_outlier)| if !is_outlier { Some(val) } else { None })
        .collect()
}

pub fn downweight_outliers(values: &[i64], outliers: &[bool], weight: f64) -> Vec<f64> {
    values.iter()
        .zip(outliers.iter())
        .map(|(&val, &is_outlier)| {
            if is_outlier {
                (val as f64) * weight
            } else {
                val as f64
            }
        }).collect()
}

#[cfg(test)]
mod outlier_detection_tests {
    use super::*;

    #[test]
    fn test_empty_input() {
        let values: Vec<i64> = Vec::new();
        let outliers = detect_outliers_zscore(&values, 3.0);
        assert!(outliers.is_empty(), "No data means no outliers.");
    }

    #[test]
    fn test_no_outliers_uniform_data() {
        let values = vec![100,100,100,100,100];
        let outliers = detect_outliers_zscore(&values, 3.0);
        assert_eq!(outliers, vec![false;5], "All identical data means no outliers.");
    }

    #[test]
    fn test_simple_outlier_detection() {
        // Data mostly around 100, but one huge spike at 10000
        let values = vec![100,101,99,102,98,10000];
        let outliers = detect_outliers_zscore(&values, 3.0);
        assert_eq!(outliers.len(), 6);
        assert!(outliers[5], "The spike at 10000 should be detected as outlier.");
        for i in 0..5 {
            assert!(!outliers[i], "Normal values near median not outliers.");
        }
    }

    #[test]
    fn test_remove_outliers() {
        let values = vec![10,20,30,10000,40,50];
        let outliers = detect_outliers_zscore(&values, 3.0);
        let cleaned = remove_outliers(&values, &outliers);
        // Expect to remove the huge spike at 10000
        assert!(!cleaned.contains(&10000), "Should have removed the outlier.");
        assert_eq!(cleaned, vec![10,20,30,40,50]);
    }

    #[test]
    fn test_downweight_outliers() {
        let values = vec![10,20,30,5000,40,50];
        let outliers = detect_outliers_zscore(&values, 3.0);
        let adjusted = downweight_outliers(&values, &outliers, 0.1);

        let outlier_indices: Vec<_> = outliers.iter().enumerate().filter_map(|(i,&o)| if o {Some(i)} else {None}).collect();
        assert_eq!(outlier_indices.len(), 1, "Should detect exactly one outlier");
        let idx = outlier_indices[0];
        assert!((adjusted[idx] - 500.0).abs()<1e-9, "Outlier should be down-weighted by factor 0.1");

        for (i, &val) in values.iter().enumerate() {
            if i != idx {
                assert_eq!(adjusted[i], val as f64, "Non-outliers should remain unchanged.");
            }
        }
    }

    #[test]
    fn test_threshold_sensitivity() {
        let values = vec![100,102,98,95,105];
        let outliers_strict = detect_outliers_zscore(&values, 1.0);
        let outliers_loose = detect_outliers_zscore(&values, 3.0);

        let strict_count = outliers_strict.iter().filter(|&&o| o).count();
        let loose_count = outliers_loose.iter().filter(|&&o| o).count();
        assert!(strict_count >= loose_count, "Stricter threshold should produce more or equal outliers.");
    }
}
crate::ix!();

pub fn analyze_usage(crate_name: &str, version_downloads: Vec<VersionDownload>) -> CrateUsageSummary {

    // Aggregate downloads by date
    let mut daily_downloads: HashMap<NaiveDate, i64> = HashMap::new();

    for download in version_downloads.iter() {
        *daily_downloads.entry(*download.date()).or_insert(0) += download.downloads();
    }

    let total_downloads: i64 = daily_downloads.values().sum();
    let average_daily_downloads = total_downloads as f64 / daily_downloads.len() as f64;
    let peak_daily_downloads = *daily_downloads.values().max().unwrap_or(&0);

    // Calculate trend (simple: increasing, decreasing, or stable)
    let mut trend = None;
    let mut diffs = Vec::new();
    let mut sorted_days: Vec<_> = daily_downloads.into_iter().collect();
    sorted_days.sort_by_key(|&(date, _)| date);

    for i in 1..sorted_days.len() {
        diffs.push(sorted_days[i].1 as i64 - sorted_days[i - 1].1 as i64);
    }

    if !diffs.is_empty() {
        let positive_diffs = diffs.iter().filter(|&&d| d > 0).count();
        let negative_diffs = diffs.iter().filter(|&&d| d < 0).count();

        trend = if positive_diffs > negative_diffs {
            Some(DownloadTrend::Increasing)
        } else if negative_diffs > positive_diffs {
            Some(DownloadTrend::Decreasing)
        } else {
            Some(DownloadTrend::Stable)
        };
    }

    // Use the builder to construct the summary
    CrateUsageSummaryBuilder::default()
        .crate_name(crate_name.to_string())
        .total_downloads(total_downloads)
        .average_daily_downloads(average_daily_downloads)
        .peak_daily_downloads(peak_daily_downloads)
        .download_trend(trend)
        .version_downloads(version_downloads)
        .build()
        .expect("Failed to build CrateUsageSummary") // Handle errors from builder
}
crate::ix!();

pub fn align_and_normalize_data(
    version_downloads: &[VersionDownload],
    full_date_range:   &[NaiveDate],

) -> Vec<i64> {

    let downloads_map: HashMap<NaiveDate, i64> = version_downloads
        .iter()
        .map(|d| (*d.date(), *d.downloads())) // Dereference both the date and the downloads
        .collect();

    full_date_range
        .iter()
        .map(|date| *downloads_map.get(date).unwrap_or(&0)) // Fill missing dates with 0
        .collect()
}

pub fn debug_alignment(
    crate_a: &str,
    crate_b: &str,
    aligned_a: &[i64],
    aligned_b: &[i64],
) {
    println!("Aligned data for {} and {}:", crate_a, crate_b);
    println!("  Aligned A: {:?}", aligned_a);
    println!("  Aligned B: {:?}", aligned_b);
}

#[cfg(test)]
mod alignment_and_normalization_tests {
    use super::*;
    use chrono::NaiveDate;

    #[test]
    fn test_empty_input() {
        let version_downloads = [];
        let full_date_range = [];
        let result = align_and_normalize_data(&version_downloads, &full_date_range);
        assert_eq!(result, Vec::<i64>::new(), "Empty input should produce empty output.");
    }

    #[test]
    fn test_full_date_range_matches_data() {
        let version_downloads = [
            VersionDownloadBuilder::default()
                .version(1_i64)
                .downloads(100_i64)
                .date(NaiveDate::from_ymd_opt(2024, 12, 1).unwrap())
                .build()
                .unwrap(),
            VersionDownloadBuilder::default()
                .version(1_i64)
                .downloads(200_i64)
                .date(NaiveDate::from_ymd_opt(2024, 12, 2).unwrap())
                .build()
                .unwrap(),
        ];
        let full_date_range = vec![
            NaiveDate::from_ymd_opt(2024, 12, 1).unwrap(),
            NaiveDate::from_ymd_opt(2024, 12, 2).unwrap(),
        ];
        let result = align_and_normalize_data(&version_downloads, &full_date_range);
        assert_eq!(result, vec![100, 200], "All dates should align correctly.");
    }

    #[test]
    fn test_full_date_range_extends_beyond_data() {
        let version_downloads = [
            VersionDownloadBuilder::default()
                .version(1_i64)
                .downloads(200_i64)
                .date(NaiveDate::from_ymd_opt(2024, 12, 2).unwrap())
                .build()
                .unwrap(),
        ];
        let full_date_range = vec![
            NaiveDate::from_ymd_opt(2024, 12, 1).unwrap(),
            NaiveDate::from_ymd_opt(2024, 12, 2).unwrap(),
            NaiveDate::from_ymd_opt(2024, 12, 3).unwrap(),
        ];
        let result = align_and_normalize_data(&version_downloads, &full_date_range);
        assert_eq!(
            result,
            vec![0, 200, 0],
            "Missing dates should be filled with 0."
        );
    }

    #[test]
    fn test_full_date_range_subset_of_data() {
        let version_downloads = [
            VersionDownloadBuilder::default()
                .version(1_i64)
                .downloads(100_i64)
                .date(NaiveDate::from_ymd_opt(2024, 12, 1).unwrap())
                .build()
                .unwrap(),
            VersionDownloadBuilder::default()
                .version(1_i64)
                .downloads(200_i64)
                .date(NaiveDate::from_ymd_opt(2024, 12, 2).unwrap())
                .build()
                .unwrap(),
            VersionDownloadBuilder::default()
                .version(1_i64)
                .downloads(300_i64)
                .date(NaiveDate::from_ymd_opt(2024, 12, 3).unwrap())
                .build()
                .unwrap(),
        ];
        let full_date_range = vec![
            NaiveDate::from_ymd_opt(2024, 12, 2).unwrap(),
            NaiveDate::from_ymd_opt(2024, 12, 3).unwrap(),
        ];
        let result = align_and_normalize_data(&version_downloads, &full_date_range);
        assert_eq!(
            result,
            vec![200, 300],
            "Only values within the full_date_range should be included."
        );
    }

    #[test]
    fn test_duplicate_dates_in_input() {
        let version_downloads = [
            VersionDownloadBuilder::default()
                .version(1_i64)
                .downloads(100_i64)
                .date(NaiveDate::from_ymd_opt(2024, 12, 2).unwrap())
                .build()
                .unwrap(),
            VersionDownloadBuilder::default()
                .version(1_i64)
                .downloads(200_i64)
                .date(NaiveDate::from_ymd_opt(2024, 12, 2).unwrap())
                .build()
                .unwrap(), // Duplicate date
        ];
        let full_date_range = vec![
            NaiveDate::from_ymd_opt(2024, 12, 2).unwrap(),
        ];
        let result = align_and_normalize_data(&version_downloads, &full_date_range);
        assert_eq!(
            result,
            vec![200],
            "The most recent value for a duplicate date should be used."
        );
    }
}
crate::ix!();

pub async fn configure_directory() -> Result<PathBuf, CrateActivityError> {
    let config_dir = dirs::home_dir()
        .map(|p| p.join(".published-crates"))
        .unwrap_or_else(|| PathBuf::from(".published-crates"));
    ensure_config_structure(&config_dir).await?;
    Ok(config_dir)
}
crate::ix!();

pub fn compute_time_lag_correlations(
    summaries: &[CrateUsageSummary],
    max_lag: i32
) -> Vec<(String, String, i32, f64)> {
    let mut results = Vec::new();
    let crate_data: HashMap<String, Vec<i64>> = summaries.iter().map(|s| {
        (s.crate_name().clone(), s.version_downloads().iter().map(|d| *d.downloads()).collect())
    }).collect();

    let crate_names: Vec<_> = crate_data.keys().cloned().collect();

    for i in 0..crate_names.len() {
        for j in (i+1)..crate_names.len() {
            let name_a = &crate_names[i];
            let name_b = &crate_names[j];
            let series_a = &crate_data[name_a];
            let series_b = &crate_data[name_b];

            let mut best_corr: f64 = 0.0;
            let mut best_lag = 0;
            let mut found_any = false;

            for lag in -max_lag..=max_lag {
                if let Some((xs, ys)) = align_for_lag(series_a, series_b, lag) {
                    let corr: f64 = pearson_correlation_i64(&xs, &ys);

                    if !found_any {
                        best_corr = corr;
                        best_lag = lag;
                        found_any = true;
                    } else {
                        let current_abs = corr.abs();
                        let best_abs = best_corr.abs();
                        if current_abs > best_abs {
                            // Strictly better correlation
                            best_corr = corr;
                            best_lag = lag;
                        } else if (current_abs - best_abs).abs() < 1e-12 {
                            // Tie in absolute correlation
                            let current_distance = lag.abs();
                            let best_distance = best_lag.abs();
                            if current_distance < best_distance {
                                // Closer to zero wins
                                best_corr = corr;
                                best_lag = lag;
                            } else if current_distance == best_distance {
                                // Still tied: pick negative lag if available
                                if lag < best_lag {
                                    best_corr = corr;
                                    best_lag = lag;
                                }
                            }
                        }
                    }
                }
            }

            if !found_any {
                // No valid alignment
                results.push((name_a.clone(), name_b.clone(), 0, 0.0));
            } else {
                results.push((name_a.clone(), name_b.clone(), best_lag, best_corr));
            }
        }
    }

    results
}


/// Align two time series for a given lag.
/// If lag > 0, we shift B forward: compare A[t] with B[t-lag]
/// If lag < 0, we shift A forward: compare A[t-lag] with B[t]
fn align_for_lag(
    a: &[i64],
    b: &[i64],
    lag: i32
) -> Option<(Vec<i64>, Vec<i64>)> {
    let n = a.len();
    if n == 0 || b.len() != n {
        return None;
    }

    if lag == 0 {
        // No lead/lag
        return Some((a.to_vec(), b.to_vec()));
    } else if lag > 0 {
        // lag > 0 means A leads B by lag days.
        // A(t) = B(t+lag)
        // Align A[lag..] with B[..n-lag]
        let shift = lag as usize;
        if shift >= n {
            return None;
        }
        let a_slice = &a[shift..];
        let b_slice = &b[..(n - shift)];
        if a_slice.len() == b_slice.len() && !a_slice.is_empty() {
            Some((a_slice.to_vec(), b_slice.to_vec()))
        } else {
            None
        }
    } else {
        // lag < 0 means B leads A by |lag| days.
        // B(t) = A(t+|lag|)
        // Align A[..n-|lag|] with B[|lag|..]
        let shift = (-lag) as usize;
        if shift >= n {
            return None;
        }
        let a_slice = &a[..(n - shift)];
        let b_slice = &b[shift..];
        if a_slice.len() == b_slice.len() && !a_slice.is_empty() {
            Some((a_slice.to_vec(), b_slice.to_vec()))
        } else {
            None
        }
    }
}

fn pearson_correlation_i64(x: &[i64], y: &[i64]) -> f64 {
    if x.len() != y.len() || x.is_empty() {
        return 0.0;
    }
    pearson_correlation(x, y)
}

pub fn display_time_lag_correlations(results: &[(String, String, i32, f64)]) {
    println!("----------------[time-lag-correlations]----------------");
    // Sort by absolute correlation descending
    let mut sorted = results.to_vec();
    sorted.sort_by(|a,b| b.3.abs().partial_cmp(&a.3.abs()).unwrap());
    for (a, b, lag, corr) in sorted.iter().take(10) {
        println!("{} - {}: best lag={} correlation={:.3}", a, b, lag, corr);
    }
    println!("");
}


#[cfg(test)]
mod time_lag_correlations_tests {
    use super::*;

    fn make_summary(crate_name: &str, downloads: &[i64]) -> CrateUsageSummary {
        // Create VersionDownload mocks
        let mut version_downloads = Vec::new();
        for (i, &d) in downloads.iter().enumerate() {
            let date = chrono::NaiveDate::from_ymd_opt(2024,1,(i+1) as u32).unwrap();
            version_downloads.push(VersionDownloadBuilder::default()
                .version(1_i64)
                .downloads(d)
                .date(date)
                .build().unwrap());
        }

        let total_downloads: i64 = downloads.iter().sum();
        let avg_daily = total_downloads as f64 / downloads.len() as f64;
        let peak = *downloads.iter().max().unwrap();

        CrateUsageSummaryBuilder::default()
            .crate_name(crate_name.to_string())
            .total_downloads(total_downloads)
            .average_daily_downloads(avg_daily)
            .peak_daily_downloads(peak)
            .download_trend(Some(DownloadTrend::Stable))
            .version_downloads(version_downloads)
            .build()
            .unwrap()
    }

    #[test]
    fn test_no_data() {
        let summaries: Vec<CrateUsageSummary> = Vec::new();
        let results = compute_time_lag_correlations(&summaries, 7);
        assert!(results.is_empty(), "No crates means no results.");
    }

    #[test]
    fn test_single_crate() {
        // Single crate => no pairs
        let s = make_summary("crateA", &[10,20,30]);
        let summaries = vec![s];
        let results = compute_time_lag_correlations(&summaries, 7);
        assert!(results.is_empty(), "One crate means no pairs.");
    }

    #[test]
    fn test_two_crates_no_lag() {
        // Perfect correlation without lag
        let a = make_summary("A", &[1,2,3,4,5]);
        let b = make_summary("B", &[1,2,3,4,5]);
        let summaries = vec![a,b];
        let results = compute_time_lag_correlations(&summaries, 2);
        assert_eq!(results.len(),1);
        let (ca, cb, lag, corr) = &results[0];
        assert!(corr.abs() > 0.999, "Perfect correlation");
        assert_eq!(*lag, 0, "Best lag is zero.");
        let mut crates = vec![ca.as_str(), cb.as_str()];
        crates.sort();
        assert_eq!(crates, vec!["A","B"]);
    }

    #[test]
    fn test_no_correlation_any_lag() {
        // A: 1,1,1,1,1
        // B: 10,9,5,2,0 no pattern related to A
        let a = make_summary("A", &[1,1,1,1,1]);
        let b = make_summary("B", &[10,9,5,2,0]);
        let summaries = vec![a,b];
        let results = compute_time_lag_correlations(&summaries, 3);
        let (_,_, lag, corr) = &results[0];
        // No correlation at all, best corr might remain near 0 and best_lag=0 by default.
        assert!(*corr < 0.5, "No significant correlation");
        assert_eq!(*lag, 0, "No correlation means no particular lag stands out.");
    }

    #[test]
    fn test_tie_same_abs_correlation_prefer_closer_to_zero() {
        // Construct data where lag=0 and lag=2 both give same perfect correlation.
        let a = dummy_summary("A", &[1,2,3,4,5]);
        let b = dummy_summary("B", &[1,2,3,4,5]); // Identical
        let results = compute_time_lag_correlations(&[a,b], 2);
        assert_eq!(results.len(),1);
        let (_,_, lag, corr) = &results[0];
        // lag=0 and lag=2 might both be perfect correlation
        // According to rules, tie means choose closer to zero => lag=0
        assert!((corr.abs()-1.0).abs()<1e-12);
        assert_eq!(*lag, 0, "Tie broken by choosing lag closer to zero");
    }

    fn dummy_summary(name: &str, values: &[i64]) -> CrateUsageSummary {
        let mut version_downloads = Vec::new();
        for (i, &d) in values.iter().enumerate() {
            let date = chrono::NaiveDate::from_ymd_opt(2024,1,(i+1) as u32).unwrap();
            version_downloads.push(VersionDownloadBuilder::default()
                .version(1_i64)
                .downloads(d)
                .date(date)
                .build().unwrap());
        }

        let total_downloads: i64 = values.iter().sum();
        let avg_daily = total_downloads as f64 / values.len() as f64;
        let peak = *values.iter().max().unwrap();

        CrateUsageSummaryBuilder::default()
            .crate_name(name.to_string())
            .total_downloads(total_downloads)
            .average_daily_downloads(avg_daily)
            .peak_daily_downloads(peak)
            .download_trend(Some(DownloadTrend::Stable))
            .version_downloads(version_downloads)
            .build()
            .unwrap()
    }

    #[test]
    fn test_single_best_lag() {
        let a = dummy_summary("A", &[10,20,30,40,50]);
        let b = dummy_summary("B", &[20,30,40,50,100]);

        let results = compute_time_lag_correlations(&[a,b], 2);
        assert_eq!(results.len(), 1);
        let (_,_, lag, corr) = &results[0];
        assert!((corr.abs()-1.0).abs() < 1e-12, "Expect perfect correlation at best lag");
        assert_eq!(*lag, 1, "Should choose lag=1 for maximum correlation");
    }

    #[test]
    fn test_tie_equal_distances_pick_negative() {
        // A=[10,20,30], B=[20,30,41]
        // ±1 lag yield perfect correlation of 1.0, lag=0 <1.0 correlation.
        // Tie at ±1 chooses negative lag.

        let a = dummy_summary("A", &[10,20,30]);
        let b = dummy_summary("B", &[20,30,41]); 
        let results = compute_time_lag_correlations(&[a,b], 1);
        let (_,_, lag, corr) = &results[0];
        assert!((corr.abs()-1.0).abs() < 1e-12, "Expect perfect correlation at ±1");
        assert_eq!(*lag, -1, "Tie between ±1 broken by choosing negative lag");
    }
}
crate::ix!();

error_tree!{

    pub enum PcaError {
        NoActivityDataAvailable,
        PcaDataLengthMismatch {
            expected_num_elements: usize,
            found_num_elements:    usize,
        }
    }

    pub enum CrateActivityError {
        Reqwest(reqwest::Error),
        Serde(serde_json::Error),
        Io(std::io::Error),
        ShapeError(ndarray::ShapeError),
        HierarchicalClusteringError(HierarchicalClusteringError),
        PcaError(PcaError),
    }
}
crate::ix!();

pub fn compute_pairwise_correlations(
    summaries: &[CrateUsageSummary],
) -> Vec<(String, String, f64)> {
    let mut correlations = Vec::new();

    for i in 0..summaries.len() {
        for j in (i + 1)..summaries.len() {
            let crate_a = summaries[i].crate_name();
            let crate_b = summaries[j].crate_name();

            let downloads_a = summaries[i].version_downloads();
            let downloads_b = summaries[j].version_downloads();

            // Extract and intersect date ranges
            let dates_a: Vec<_> = downloads_a.iter().map(|d| *d.date()).collect();
            let dates_b: Vec<_> = downloads_b.iter().map(|d| *d.date()).collect();
            let common_dates = intersect_date_ranges(&dates_a, &dates_b);

            // Align data to the common date range
            let aligned_a = align_and_normalize_data(downloads_a, &common_dates);
            let aligned_b = align_and_normalize_data(downloads_b, &common_dates);

            // Skip if either dataset lacks variance
            if !has_significant_variance(&aligned_a) || !has_significant_variance(&aligned_b) {
                continue;
            }

            // Compute correlation
            let correlation = pearson_correlation(&aligned_a, &aligned_b);
            correlations.push((crate_a.clone(), crate_b.clone(), correlation));
        }
    }

    correlations
}

pub fn debug_correlation(crate_a: &str, crate_b: &str, correlation: f64) {
    println!("Correlation for {} and {}: {:.4}", crate_a, crate_b, correlation);
}
use crate_activity::*;

#[tokio::main]
async fn main() -> Result<(),CrateActivityError> {

    let cli = CrateActivityCli::read_command_line();

    crate_activity_main(&cli).await?;

    Ok(())
}
crate::ix!();

pub const DEFAULT_USER_AGENT: &'static str = "crate-activity-bot/1.0 (contact@example.com)";

// Read crate names from a config file (~/.published-crates)
pub async fn read_crate_list(config_dir: &Path) -> Vec<String> {
    let crate_list_file = config_dir.join("crate_list.txt");

    if let Ok(file) = File::open(&crate_list_file).await {
        let mut lines = BufReader::new(file).lines();
        let mut crate_list = Vec::new();

        while let Some(line) = lines.next_line().await.unwrap_or(None) {
            let trimmed = line.trim();
            if !trimmed.is_empty() {
                crate_list.push(trimmed.to_string());
            }
        }
        crate_list
    } else {
        eprintln!("Warning: Could not find {}, using default crate list.", crate_list_file.display());
        vec![
            "serde".to_string(),
            "tokio".to_string(),
        ]
    }
}

pub async fn read_user_agent(config_dir: &Path) -> String {
    let user_agent_file = config_dir.join("user_agent.txt");

    if let Ok(contents) = fs::read_to_string(&user_agent_file).await {
        contents.trim().to_string()
    } else {
        eprintln!(
            "Warning: Could not find {}, using default user agent.",
            user_agent_file.display()
        );
        DEFAULT_USER_AGENT.to_string()
    }
}

pub async fn ensure_config_structure(config_dir: &Path) -> io::Result<()> {
    fs::create_dir_all(config_dir.join("cache")).await?;
    if !config_dir.join("crate_list.txt").exists() {
        fs::write(config_dir.join("crate_list.txt"), "serde\ntokio\n").await?;
    }
    if !config_dir.join("user_agent.txt").exists() {
        fs::write(config_dir.join("user_agent.txt"), DEFAULT_USER_AGENT).await?;
    }
    Ok(())
}
crate::ix!();

#[derive(Debug, Getters)]
pub struct CrateActivityData {

    #[getset(get = "pub")]
    summaries: Vec<CrateUsageSummary>,
    
    #[getset(get = "pub")]
    interval_downloads_1d: HashMap<String, i64>,
    
    #[getset(get = "pub")]
    interval_downloads_3d: HashMap<String, i64>,

    #[getset(get = "pub")]
    interval_downloads_7d: HashMap<String, i64>,
}

#[tracing::instrument(level = "info", skip_all)]
pub async fn gather_crate_activity_data(
    ignore_cache:   bool,
    crate_names:    &[String],
    user_agent:     &str,
    config_dir:     &Path,
    one_day_ago:    NaiveDate,
    three_days_ago: NaiveDate,
    seven_days_ago: NaiveDate,
) -> Result<CrateActivityData, CrateActivityError> {
    use futures::{StreamExt};

    tracing::info!(
        "Gathering crate activity data for {} crates (ignore_cache={})",
        crate_names.len(),
        ignore_cache
    );

    // We'll limit concurrency to avoid overwhelming crates.io.
    let concurrency_limit = 8usize;

    // Create a stream of futures (one for each crate).
    let crate_fetches = futures::stream::iter(crate_names.iter().map(|crate_name| {
        let crate_name = crate_name.clone();
        let ua = user_agent.to_string();
        let cfg_dir = config_dir.to_path_buf();
        async move {
            tracing::debug!("Fetching usage for crate '{}'", crate_name);
            match fetch_usage(ignore_cache, &ua, &cfg_dir, &crate_name).await {
                Ok(Some(response)) => {
                    tracing::info!("Successfully fetched usage for crate '{}'", crate_name);
                    Some((crate_name, response))
                },
                Ok(None) => {
                    tracing::warn!("No data for crate '{}'", crate_name);
                    None
                },
                Err(e) => {
                    tracing::error!("Failed to fetch data for '{}': {:?}", crate_name, e);
                    None
                }
            }
        }
    }))
    .buffer_unordered(concurrency_limit);

    // Collect all results in parallel.
    let results: Vec<Option<(String, CrateResponse)>> = crate_fetches.collect().await;

    let mut summaries             = Vec::new();
    let mut interval_downloads_1d = HashMap::new();
    let mut interval_downloads_3d = HashMap::new();
    let mut interval_downloads_7d = HashMap::new();

    // Process the completed fetches.
    for item in results {
        if let Some((crate_name, response)) = item {
            let summary = analyze_usage(&crate_name, response.version_downloads().to_vec());
            summaries.push(summary);

            let downloads_last_1d: i64 = response
                .version_downloads()
                .iter()
                .filter(|d| *d.date() >= one_day_ago)
                .map(|d| d.downloads())
                .sum();

            let downloads_last_3d: i64 = response
                .version_downloads()
                .iter()
                .filter(|d| *d.date() >= three_days_ago)
                .map(|d| d.downloads())
                .sum();

            let downloads_last_7d: i64 = response
                .version_downloads()
                .iter()
                .filter(|d| *d.date() >= seven_days_ago)
                .map(|d| d.downloads())
                .sum();

            interval_downloads_1d.insert(crate_name.clone(), downloads_last_1d);
            interval_downloads_3d.insert(crate_name.clone(), downloads_last_3d);
            interval_downloads_7d.insert(crate_name.clone(), downloads_last_7d);
        }
    }

    tracing::info!(
        "Collected activity data for {} crates (out of {} requested).",
        summaries.len(),
        crate_names.len()
    );

    Ok(CrateActivityData {
        summaries,
        interval_downloads_1d,
        interval_downloads_3d,
        interval_downloads_7d,
    })
}
crate::ix!();

pub fn display_dendrogram(dendrogram: &Dendrogram) {

    println!("----------------[hierarchical-clustering-dendrogram]----------------");

    fn print_node(node: &Dendrogram, indent: usize) {
        let prefix = " ".repeat(indent);
        match node {
            Dendrogram::Leaf { crate_name } => {
                println!("{}- {}", prefix, crate_name);
            }
            Dendrogram::Internal { left, right, distance } => {
                println!("{}(distance: {:.2})", prefix, distance);
                print_node(left, indent + 2);
                print_node(right, indent + 2);
            }
        }
    }
    print_node(dendrogram, 0);
}
crate::ix!();

pub fn perform_pca(crate_activity: &HashMap<String, Vec<i64>>) -> Result<(), PcaError> {
    tracing::info!("Starting PCA analysis on crate activity data...");

    // Prepare the data matrix
    let data_matrix = prepare_data_matrix(crate_activity)?;

    // Standardize the matrix
    let standardized_matrix = standardize_matrix(&data_matrix)?;

    // Compute the covariance matrix
    let covariance_matrix = compute_covariance_matrix(&standardized_matrix);

    // Convert to nalgebra matrix for eigen decomposition
    let covariance_dmatrix = DMatrix::from_row_slice(
        covariance_matrix.nrows(),
        covariance_matrix.ncols(),
        covariance_matrix.as_slice().unwrap(),
    );

    // Perform eigen decomposition
    let eigen = covariance_dmatrix.symmetric_eigen();
    let eigenvalues = eigen.eigenvalues.as_slice().to_vec();
    let eigenvectors_flat = eigen.eigenvectors.as_slice().to_vec();

    let eigenvectors = Array2::from_shape_vec(
        (eigen.eigenvalues.len(), eigen.eigenvalues.len()),
        eigenvectors_flat,
    )
    .unwrap();

    // Display results
    display_pca_results(crate_activity.keys().cloned().collect(), eigenvalues, eigenvectors);

    Ok(())
}

fn prepare_data_matrix(crate_activity: &std::collections::HashMap<String, Vec<i64>>)
    -> Result<ndarray::Array2<f64>, PcaError> 
{
    let num_days = crate_activity.values().map(|v| v.len()).max().unwrap_or(0);
    if num_days == 0 {
        return Err(PcaError::NoActivityDataAvailable);
    }

    // Collect and sort crate names to ensure stable ordering
    let mut crate_names: Vec<_> = crate_activity.keys().cloned().collect();
    crate_names.sort();

    let num_crates = crate_activity.len();
    let mut data = Vec::with_capacity(num_crates * num_days);

    // Fill rows in alphabetical order or in the order the tests expect
    for crate_name in &crate_names {
        let crate_data = &crate_activity[crate_name];
        // Pad with zeros if shorter than num_days
        for &value in crate_data.iter().chain(std::iter::repeat(&0).take(num_days - crate_data.len())) {
            data.push(value as f64);
        }
    }

    ndarray::Array2::from_shape_vec((num_crates, num_days), data)
        .map_err(|_| PcaError::PcaDataLengthMismatch {
            expected_num_elements: num_crates * num_days,
            found_num_elements: num_crates * num_days, // data.len() should be correct here
        })
}

fn perform_eigen_decomposition(covariance_matrix: &Array2<f64>) -> (Vec<f64>, Array2<f64>) {
    let n = covariance_matrix.nrows(); // Assuming square matrix
    let covariance_dmatrix = DMatrix::from_row_slice(
        n,
        n,
        covariance_matrix.as_slice().unwrap(),
    );
    let eigen = covariance_dmatrix.symmetric_eigen();

    // Convert eigenvalues to Vec<f64>
    let eigenvalues: Vec<f64> = eigen.eigenvalues.as_slice().to_vec();

    // Convert eigenvectors to Array2<f64>
    let eigenvectors = Array2::from_shape_vec((n, n), eigen.eigenvectors.as_slice().to_vec())
        .expect("Failed to convert eigenvectors to Array2");

    (eigenvalues, eigenvectors)
}

fn standardize_matrix(matrix: &ndarray::Array2<f64>) -> Result<ndarray::Array2<f64>, PcaError> {
    let mut standardized_matrix = matrix.clone();
    for mut column in standardized_matrix.columns_mut() {
        let mean = column.mean().unwrap_or(0.0);
        let std_dev = column.std(0.0);

        if std_dev.abs() < 1e-12 {
            // Constant column: set all values to zero.
            for val in column.iter_mut() {
                *val = 0.0;
            }
        } else {
            column.mapv_inplace(|x| (x - mean) / std_dev);
        }
    }
    Ok(standardized_matrix)
}

fn compute_covariance_matrix(matrix: &Array2<f64>) -> Array2<f64> {
    matrix.t().dot(matrix) / (matrix.nrows() as f64)
}

fn display_pca_results(crate_names: Vec<String>, eigenvalues: Vec<f64>, eigenvectors: Array2<f64>) {
    println!("Explained variance by significant principal components:");

    let total_variance: f64 = eigenvalues.iter().sum();
    let significant_components: Vec<_> = eigenvalues
        .iter()
        .enumerate()
        .filter(|(_, &eigenvalue)| (eigenvalue / total_variance) * 100.0 >= 3.0)
        .take(10)
        .collect();

    for (i, &eigenvalue) in &significant_components {
        println!(
            "Component {}: {:.2}% of total variance",
            i + 1,
            (eigenvalue / total_variance) * 100.0
        );
    }

    println!("\nTop contributing crates to significant principal components:");
    for (i, &eigenvalue) in &significant_components {
        let component = eigenvectors.column(*i); // Deref i here
        let mut contributions: Vec<(String, f64)> = crate_names
            .iter()
            .zip(component.iter())
            .map(|(crate_name, &weight)| (crate_name.clone(), weight.abs()))
            .collect();
        contributions.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());

        println!("Component {}: Top crates", i + 1);
        for (crate_name, weight) in contributions.iter().take(5) {
            println!("  {:>6.2}  {}", weight, crate_name);
        }
    }
}

#[cfg(test)]
mod pca_tests {
    use super::*; // Adjust as needed if PCA code is in another module.
    use std::collections::HashMap;
    use ndarray::array;

    // Helper function to create crate activity data from a slice of tuples.
    // (crate_name, downloads) pairs.
    fn create_crate_activity_data(data: &[(&str, &[i64])]) -> HashMap<String, Vec<i64>> {
        let mut map = HashMap::new();
        for (name, downloads) in data.iter() {
            map.insert((*name).to_string(), downloads.to_vec());
        }
        map
    }

    #[test]
    fn test_display_pca_results() {
        // This function primarily prints output. We'll just ensure it doesn't panic.
        let eigenvalues = vec![3.0, 2.0, 1.0];
        let eigenvectors = array![
            [0.577350269, 0.707106781, 0.408248290],
            [0.577350269, 0.0,          0.816496580],
            [0.577350269, -0.707106781, 0.408248290]
        ];
        let crate_names = vec!["crateA".to_string(), "crateB".to_string(), "crateC".to_string()];

        // Just verify that the function runs without error.
        // We cannot check stdout easily here, but at least no panic should occur.
        display_pca_results(crate_names, eigenvalues, eigenvectors);
    }

    // Ensure no panics on empty input
    #[test]
    fn test_prepare_data_matrix_empty_input() {
        let crate_activity: HashMap<String, Vec<i64>> = HashMap::new();
        let result = prepare_data_matrix(&crate_activity);
        assert!(matches!(result, Err(PcaError::NoActivityDataAvailable)));
    }

    // Test varying lengths with deterministic ordering
    #[test]
    fn test_prepare_data_matrix_varying_lengths() {
        let crate_activity = create_crate_activity_data(&[
            ("crateA", &[100, 200, 300]),
            ("crateB", &[50, 75]),
        ]);

        // This test expects alphabetical order: crateA then crateB
        // crateA row => [100, 200, 300]
        // crateB row => [50, 75, 0] (padded)
        if let Ok(matrix) = prepare_data_matrix(&crate_activity) {
            assert_eq!(matrix.nrows(), 2);
            assert_eq!(matrix.ncols(), 3);

            // Check crateA row
            assert_eq!(matrix[[0,0]], 100.0);
            assert_eq!(matrix[[0,1]], 200.0);
            assert_eq!(matrix[[0,2]], 300.0);

            // Check crateB row
            assert_eq!(matrix[[1,0]], 50.0);
            assert_eq!(matrix[[1,1]], 75.0);
            assert_eq!(matrix[[1,2]], 0.0);
        } else {
            panic!("Expected success for prepared data matrix with varying lengths.");
        }
    }

    // Test uniform length data
    #[test]
    fn test_prepare_data_matrix_uniform_length() {
        let crate_activity = create_crate_activity_data(&[
            ("crateA", &[1, 2, 3, 4]),
            ("crateB", &[10, 20, 30, 40]),
        ]);

        // Alphabetical: crateA, crateB
        // crateA => [1,  2,  3,  4]
        // crateB => [10, 20, 30, 40]
        if let Ok(matrix) = prepare_data_matrix(&crate_activity) {
            assert_eq!(matrix.nrows(), 2);
            assert_eq!(matrix.ncols(), 4);

            // crateA row
            assert_eq!(matrix[[0,0]], 1.0);
            assert_eq!(matrix[[0,1]], 2.0);
            assert_eq!(matrix[[0,2]], 3.0);
            assert_eq!(matrix[[0,3]], 4.0);

            // crateB row
            assert_eq!(matrix[[1,0]], 10.0);
            assert_eq!(matrix[[1,1]], 20.0);
            assert_eq!(matrix[[1,2]], 30.0);
            assert_eq!(matrix[[1,3]], 40.0);
        } else {
            panic!("Expected success for uniform length data.");
        }
    }

    // Test a single crate scenario
    #[test]
    fn test_prepare_data_matrix_single_crate() {
        let crate_activity = create_crate_activity_data(&[
            ("onlyCrate", &[42, 42, 42]),
        ]);
        if let Ok(matrix) = prepare_data_matrix(&crate_activity) {
            assert_eq!(matrix.nrows(), 1);
            assert_eq!(matrix.ncols(), 3);
            assert_eq!(matrix[[0,0]], 42.0);
            assert_eq!(matrix[[0,1]], 42.0);
            assert_eq!(matrix[[0,2]], 42.0);
        } else {
            panic!("Expected success with single crate input.");
        }
    }

    // Test standardization with a basic matrix
    #[test]
    fn test_standardize_matrix_basic() {
        let matrix = array![
            [1.0,  2.0,  3.0],
            [2.0,  2.0,  2.0],
            [10.0, 11.0, 12.0]
        ];
        if let Ok(std_matrix) = standardize_matrix(&matrix) {
            // Check column means ~0 and std dev ~1 (where possible)
            for col in 0..std_matrix.ncols() {
                let column = std_matrix.column(col);
                let mean = column.mean().unwrap_or(0.0);
                let std_dev = column.std(0.0);
                assert!((mean - 0.0).abs() < 1e-9);
                if std_dev > 1e-9 {
                    assert!((std_dev - 1.0).abs() < 1e-9);
                }
            }
        } else {
            panic!("Expected successful standardization.");
        }
    }

    // Test standardization on a constant column
    #[test]
    fn test_standardize_matrix_constant_column() {
        let matrix = array![
            [5.0, 1.0, 2.0],
            [5.0, 2.0, 3.0],
            [5.0, 3.0, 4.0]
        ];
        if let Ok(std_matrix) = standardize_matrix(&matrix) {
            let col0 = std_matrix.column(0);
            for val in col0 {
                assert!((val - 0.0).abs() < 1e-9, "Expected zero column for constant input.");
            }
        } else {
            panic!("Expected success with a constant column.");
        }
    }

    // Test standardization on negative and mixed values
    #[test]
    fn test_standardize_matrix_negative_values() {
        let matrix = array![
            [-10.0, 0.0,  10.0],
            [-20.0, 0.0,  20.0],
            [-30.0, 0.0,  30.0],
        ];
        if let Ok(std_matrix) = standardize_matrix(&matrix) {
            // The middle column is all zeros (constant), should become zero column
            let col1 = std_matrix.column(1);
            for val in col1 {
                assert!((val - 0.0).abs() < 1e-9);
            }

            // The other columns have linear increasing/decreasing patterns.
            // Just verify mean ~0 and std ~1.
            for col_idx in [0,2].iter() {
                let col = std_matrix.column(*col_idx);
                let mean = col.mean().unwrap_or(0.0);
                let std_dev = col.std(0.0);
                assert!((mean - 0.0).abs() < 1e-9);
                assert!((std_dev - 1.0).abs() < 1e-9);
            }
        } else {
            panic!("Expected successful standardization with negative values.");
        }
    }

    // Test covariance matrix computation
    #[test]
    fn test_compute_covariance_matrix() {
        let matrix = array![
            [1.0, 2.0, 3.0],
            [2.0, 2.0, 2.0],
            [10.0,11.0,12.0]
        ];
        let cov = compute_covariance_matrix(&matrix);
        // Check symmetry
        for i in 0..cov.nrows() {
            for j in 0..cov.ncols() {
                let diff = (cov[[i,j]] - cov[[j,i]]).abs();
                assert!(diff < 1e-12, "Covariance matrix not symmetric.");
            }
        }
    }

    // Test eigen decomposition on a simple diagonal matrix
    #[test]
    fn test_perform_eigen_decomposition() {
        let matrix = array![
            [2.0, 0.0],
            [0.0, 1.0]
        ];
        let (vals, vecs) = perform_eigen_decomposition(&matrix);
        let mut sorted_vals = vals.clone();
        sorted_vals.sort_by(|a,b| a.partial_cmp(b).unwrap());
        assert!((sorted_vals[0]-1.0).abs()<1e-9);
        assert!((sorted_vals[1]-2.0).abs()<1e-9);
        assert_eq!(vecs.nrows(), 2);
        assert_eq!(vecs.ncols(), 2);
    }

    // Test PCA with no data
    #[test]
    fn test_perform_pca_no_data() {
        let crate_activity = HashMap::new();
        let result = perform_pca(&crate_activity);
        assert!(matches!(result, Err(PcaError::NoActivityDataAvailable)));
    }

    // Test basic PCA
    #[test]
    fn test_perform_pca_basic() {
        let crate_activity = create_crate_activity_data(&[
            ("crateA", &[1,2,3]),
            ("crateB", &[2,4,6]),
        ]);
        let result = perform_pca(&crate_activity);
        if let Err(e) = result {
            panic!("Expected PCA success, got: {:?}", e);
        }
    }

    // Test PCA on identical crates
    #[test]
    fn test_pca_with_identical_crates() {
        let crate_activity = create_crate_activity_data(&[
            ("crate1", &[10, 20, 30, 40]),
            ("crate2", &[10, 20, 30, 40]),
        ]);
        let result = perform_pca(&crate_activity);
        if let Err(e) = result {
            panic!("Expected success with identical crates: {:?}", e);
        }
    }

    // Test PCA with zero variance data
    #[test]
    fn test_pca_with_zero_variance_data() {
        let crate_activity = create_crate_activity_data(&[
            ("constantCrate", &[100,100,100]),
            ("anotherConstantCrate", &[100,100,100]),
        ]);
        let result = perform_pca(&crate_activity);
        if let Err(e) = result {
            panic!("Expected PCA success with zero variance data: {:?}", e);
        }
    }

    // Test PCA on different lengths
    #[test]
    fn test_pca_different_lengths() {
        let crate_activity = create_crate_activity_data(&[
            ("crateShort", &[1,2]),
            ("crateLong", &[5,10,15,20]),
        ]);
        let result = perform_pca(&crate_activity);
        if let Err(e) = result {
            panic!("Expected PCA success with different lengths: {:?}", e);
        }
    }

    // Test PCA on large random data
    #[test]
    fn test_pca_large_random_data() {
        let mut large_data = HashMap::new();
        let days = 500;
        for i in 0..50 {
            let crate_name = format!("crate{}", i);
            let values: Vec<i64> = (0..days).map(|d| d as i64 * (i as i64 + 1)).collect();
            large_data.insert(crate_name, values);
        }
        let result = perform_pca(&large_data);
        if let Err(e) = result {
            panic!("Expected PCA success with large data: {:?}", e);
        }
    }

    // Additional edge case: random negative and positive, varying sizes
    #[test]
    fn test_pca_with_mixed_random_data() {
        let crate_activity = create_crate_activity_data(&[
            ("alpha", &[-1, -2, -3, -4, -5]),
            ("beta",  &[5, 4, 3, 2, 1]),
            ("gamma", &[0, 10, 20, 30, 40])
        ]);

        let result = perform_pca(&crate_activity);
        if let Err(e) = result {
            panic!("Expected PCA success with mixed random data: {:?}", e);
        }
    }

    // Test stable ordering: we rely on alphabetical sorting in prepare_data_matrix
    #[test]
    fn test_pca_stable_ordering() {
        let crate_activity = create_crate_activity_data(&[
            ("zCrate", &[3,3,3]),
            ("aCrate", &[1,1,1]),
            ("mCrate", &[2,2,2]),
        ]);
        // After sorting: aCrate, mCrate, zCrate
        // Check ordering by analyzing the resulting matrix directly.
        if let Ok(matrix) = prepare_data_matrix(&crate_activity) {
            // aCrate row => [1,1,1]
            // mCrate row => [2,2,2]
            // zCrate row => [3,3,3]
            assert_eq!(matrix[[0,0]], 1.0);
            assert_eq!(matrix[[1,0]], 2.0);
            assert_eq!(matrix[[2,0]], 3.0);
        } else {
            panic!("Expected success for stable ordering test.");
        }
    }
}
crate::ix!();

#[derive(Getters,Setters,Debug, Serialize,Deserialize)]
pub struct CrateResponse {
    #[getset(get = "pub", set = "pub")] version_downloads: Vec<VersionDownload>,
}
crate::ix!();

/// Crate Activity Analyzer
#[derive(Getters,StructOpt, Debug)]
#[structopt(name = "act")]
pub struct CrateActivityCli {

    #[structopt(long, short = "i", help = "Ignores crate activity cache, scrapes activity data again")]
    #[getset(get = "pub")]
    ignore_cache: bool,

    /// Enable all analyses: correlations, PCA, hierarchical clustering, network analysis, etc.
    #[structopt(long, help = "Enable all analyses at once")]
    #[getset(get = "pub")]
    all: bool,

    /// Toggle to enable or disable correlation analysis
    #[structopt(long, short = "c", help = "Display correlation analysis")]
    #[getset(get = "pub")]
    show_correlations: bool,

    /// Toggle to enable or disable PCA analysis
    #[structopt(long, short = "p", help = "Perform PCA analysis")]
    #[getset(get = "pub")]
    perform_pca: bool,

    /// Toggle to enable hierarchical clustering
    #[structopt(long, short = "h", help = "Perform hierarchical clustering")]
    #[getset(get = "pub")]
    perform_hierarchical_clustering: bool,

    /// Toggle to enable correlation network analysis
    #[structopt(long, short = "n", help = "Perform correlation network analysis")]
    #[getset(get = "pub")]
    correlation_network: bool,

    /// Threshold for including edges in the correlation network graph
    #[structopt(long, default_value = "0.7", help = "Correlation threshold for network edges")]
    #[getset(get = "pub")]
    network_threshold: f64,

    /// Use Girvan–Newman algorithm to find a specified number of communities
    #[structopt(long, short = "g", help = "Target number of communities for Girvan–Newman")]
    #[getset(get = "pub")]
    girvan_newman: Option<usize>,

    /// Compute betweenness centrality for nodes (and edges) and display top nodes
    #[structopt(long, short = "b", help = "Compute betweenness centrality and display top nodes")]
    #[getset(get = "pub")]
    compute_betweenness: bool,

    /// Print a summary of the network graph
    #[structopt(long, short = "s", help = "Print a summary of the network graph")]
    #[getset(get = "pub")]
    print_summary: bool,

    /// Toggle to enable time-lagged correlation analysis
    #[structopt(long, short = "t", help = "Compute time-lagged correlations")]
    #[getset(get = "pub")]
    time_lag_correlations: bool,

    /// Maximum lag in days for time-lagged correlations
    #[structopt(long, default_value = "7", help = "Maximum lag (in days) to consider for time-lag correlations")]
    #[getset(get = "pub")]
    max_lag: i32,

    /// Z-score threshold for outlier detection (MAD-based)
    #[structopt(long, default_value = "24.0", help = "Z-score threshold for outlier detection")]
    #[getset(get = "pub")]
    outlier_z_threshold: f64,

    /// Downweight outliers instead of removing them
    #[structopt(long, help = "Downweight outliers instead of removing them")]
    #[getset(get = "pub")]
    downweight_outliers: bool,

    /// Factor by which to downweight outliers if --downweight-outliers is used
    #[structopt(long, default_value = "0.1", help = "Downweight factor for outliers")]
    #[getset(get = "pub")]
    outlier_weight: f64,

    /// Disable outlier handling altogether
    #[structopt(long, help = "Disable outlier detection and handling")]
    disable_outlier_handling: bool,

    #[structopt(long, help = "If true, we will print each individual crate per group")]
    #[getset(get="pub")]
    expand_groups: bool,

    #[structopt(long, default_value = "2", help = "Minimum group size required to treat them as a group")]
    #[getset(get="pub")]
    min_group_size: usize,
}

impl CrateActivityCli {

    pub fn read_command_line() -> Self {
        let mut cli = CrateActivityCli::from_args();
        cli.apply_all_flag();
        cli
    }

    pub fn disable_outlier_handling(&self) -> bool {

        #[cfg(test)]
        let disable_outliers_override = true; // Force no outliers in test

        #[cfg(not(test))]
        let disable_outliers_override = false;

        let disable_outliers = self.disable_outlier_handling || disable_outliers_override;

        disable_outliers
    }

    /// Apply the `--all` flag overrides if set.
    pub fn apply_all_flag(&mut self) {
        if self.all {
            self.show_correlations = true;
            self.perform_pca = true;
            self.perform_hierarchical_clustering = true;
            self.correlation_network = true;
            self.compute_betweenness = true;
            self.print_summary = true;
            self.time_lag_correlations = true;
            // You might leave outlier handling as is or also enable/disable it if desired.
        }
    }
}
crate::ix!();

#[derive(Builder,Getters,Setters,Clone,Debug,Serialize,Deserialize)]
#[builder(setter(into))]
pub struct VersionDownload {
    #[getset(get = "pub", set = "pub")] version:   i64,
    #[getset(get = "pub", set = "pub")] downloads: i64,
    #[getset(get = "pub", set = "pub")] date:      NaiveDate,
}
crate::ix!();

#[derive(Builder,Debug)]
#[builder(setter(into))]
pub struct CrateActivitySummary {
    date_interval_1d:         NaiveDate,
    date_interval_3d:         NaiveDate,
    date_interval_full_start: NaiveDate,
    date_interval_full_end:   NaiveDate,

    total_downloads:               i64,
    avg_daily_downloads:           f64,
    avg_daily_downloads_per_crate: f64,
    median_daily_downloads:        i64,
    crates_analyzed:               usize,

    top_crates_1d: Vec<(String, i64)>,
    top_crates_3d: Vec<(String, i64)>,
    top_crates_7d: Vec<(String, i64)>,

    /// If true, we'll print each individual crate in a group
    expand_groups: bool,

    /// Minimum group size required to treat them as a “group”
    min_group_size: usize,
}

impl CrateActivitySummary {
    pub fn new(
        summaries: &[CrateUsageSummary],
        interval_downloads_1d: HashMap<String, i64>,
        interval_downloads_3d: HashMap<String, i64>,
        interval_downloads_7d: HashMap<String, i64>,
        one_day_ago: NaiveDate,
        three_days_ago: NaiveDate,
        seven_days_ago: NaiveDate,
        expand_groups: bool,
        min_group_size: usize,
    ) -> Self {
        // Compute the full date range
        let (full_start, full_end) = summaries
            .iter()
            .flat_map(|s| s.version_downloads())
            .map(|d| d.date())
            .minmax()
            .into_option()
            .unwrap_or((&one_day_ago, &one_day_ago));

        // Overall stats
        let total_downloads: i64 = summaries.iter().map(|s| s.total_downloads()).sum();
        let avg_daily_downloads: f64 =
            summaries.iter().map(|s| s.average_daily_downloads()).sum::<f64>();
        let avg_daily_downloads_per_crate = if summaries.is_empty() {
            0.0
        } else {
            avg_daily_downloads / summaries.len() as f64
        };

        // Median daily downloads
        let mut daily_downloads: Vec<i64> =
            summaries.iter().map(|s| *s.total_downloads()).collect();
        daily_downloads.sort();
        let median_daily_downloads = if daily_downloads.is_empty() {
            0
        } else if daily_downloads.len() % 2 == 0 {
            let mid = daily_downloads.len() / 2;
            (daily_downloads[mid - 1] + daily_downloads[mid]) / 2
        } else {
            daily_downloads[daily_downloads.len() / 2]
        };

        // Convert the HashMaps into sorted vecs
        let mut top_crates_1d: Vec<_> = interval_downloads_1d.into_iter().collect();
        let mut top_crates_3d: Vec<_> = interval_downloads_3d.into_iter().collect();
        let mut top_crates_7d: Vec<_> = interval_downloads_7d.into_iter().collect();

        top_crates_1d.sort_by_key(|&(_, downloads)| std::cmp::Reverse(downloads));
        top_crates_3d.sort_by_key(|&(_, downloads)| std::cmp::Reverse(downloads));
        top_crates_7d.sort_by_key(|&(_, downloads)| std::cmp::Reverse(downloads));

        CrateActivitySummary {
            date_interval_1d:         one_day_ago,
            date_interval_3d:         three_days_ago,
            date_interval_full_end:   *full_end,
            date_interval_full_start: *full_start,

            total_downloads,
            avg_daily_downloads,
            avg_daily_downloads_per_crate,
            median_daily_downloads,
            crates_analyzed: summaries.len(),

            top_crates_1d,
            top_crates_3d,
            top_crates_7d,

            expand_groups,
            min_group_size,
        }
    }
}

impl fmt::Display for CrateActivitySummary {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        use std::collections::HashMap;
        use std::fmt::Write as _;

        // Helper: extract the group prefix from a crate name
        // e.g. "surgefx-allpass" => "surgefx", "workspacer-3p" => "workspacer"
        fn extract_prefix(crate_name: &str) -> String {
            if let Some(idx) = crate_name.find('-') {
                crate_name[..idx].to_string()
            } else {
                crate_name.to_string()
            }
        }

        // We'll store stats in a struct for convenience
        #[derive(Clone)]
        struct GroupStats {
            group_label:   String,
            max_downloads: i64,
            sum_downloads: i64,
            avg_downloads: f64,
            n_crates:      usize,
            members:       Vec<(String, i64)>,
        }

        #[tracing::instrument(level = "debug", skip(crates))]
        fn group_crates_compact(
            crates: &[(String, i64)],
            min_group_size: usize,
        ) -> (Vec<GroupStats>, Vec<(String, i64)>) {
            let mut group_map: HashMap<String, Vec<(String, i64)>> = HashMap::new();

            // Collect members by prefix
            for (crate_name, downloads) in crates {
                let prefix = extract_prefix(crate_name);
                group_map
                    .entry(prefix)
                    .or_default()
                    .push((crate_name.clone(), *downloads));
            }

            let mut groups = Vec::new();
            let mut single_items = Vec::new();

            for (prefix, members) in group_map {
                if members.len() >= min_group_size {
                    // We form a group
                    let sum_downloads: i64 = members.iter().map(|m| m.1).sum();
                    let max_downloads: i64 = members.iter().map(|m| m.1).max().unwrap_or(0);
                    let n_crates = members.len();
                    let avg_downloads = if n_crates > 0 {
                        sum_downloads as f64 / n_crates as f64
                    } else {
                        0.0
                    };
                    // e.g. "surgefx-" or "workspacer-"
                    let group_label = format!("{}-*", prefix);

                    // Sort the group's members by descending downloads
                    let mut sorted_members = members.clone();
                    sorted_members.sort_by(|a, b| {
                        b.1.cmp(&a.1).then_with(|| a.0.cmp(&b.0))
                    });

                    groups.push(GroupStats {
                        group_label,
                        max_downloads,
                        sum_downloads,
                        avg_downloads,
                        n_crates,
                        members: sorted_members,
                    });
                } else {
                    // If group < min_group_size, treat each crate individually
                    for (crate_name, downloads) in members {
                        single_items.push((crate_name, downloads));
                    }
                }
            }

            // Sort groups by descending max_downloads, then by descending sum, then alpha
            groups.sort_by(|a, b| {
                b.max_downloads
                    .cmp(&a.max_downloads)
                    .then_with(|| b.sum_downloads.cmp(&a.sum_downloads))
                    .then_with(|| a.group_label.cmp(&b.group_label))
            });

            // Then sort single items (descending)
            single_items.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.cmp(&b.0)));

            (groups, single_items)
        }

        #[tracing::instrument(level = "debug", skip(f, crates))]
        fn display_grouped_crates_compact(
            f: &mut fmt::Formatter<'_>,
            heading: &str,
            crates: &[(String, i64)],
            min_group_size: usize,
            expand_groups: bool,
        ) -> fmt::Result {
            writeln!(f, "\n{}", heading)?;

            let (groups, single_items) = group_crates_compact(crates, min_group_size);

            // The total download count we show at the top line
            // is the sum of each group's max_downloads plus the sum of each single item.
            // Because the user wants to avoid "double-counting" the same prefix family.
            let mut total_for_display = 0i64;
            for g in &groups {
                total_for_display += g.max_downloads;
            }
            let single_total: i64 = single_items.iter().map(|x| x.1).sum();
            total_for_display += single_total;

            let group_coverage: usize = groups.iter().map(|g| g.n_crates).sum();
            let overall_count = group_coverage + single_items.len();

            writeln!(
                f,
                "  {} distinct prefix group(s) covering {} crates, total {} downloads (by max+singles)",
                groups.len(),
                overall_count,
                total_for_display
            )?;

            // Display each group
            for g in &groups {
                // example line:
                //   "workspacer-*   max=87  avg=27.76  sum=1721  n_crates=xx"
                writeln!(
                    f,
                    "  {:<24} max={:<5} avg={:>6.2} sum={:<6} n_crates={}",
                    g.group_label,
                    g.max_downloads,
                    g.avg_downloads,
                    g.sum_downloads,
                    g.n_crates
                )?;
                // If expand_groups is true, show each member
                if expand_groups {
                    for (crate_name, dl) in &g.members {
                        writeln!(f, "    {:<24} {:>5} downloads", crate_name, dl)?;
                    }
                }
            }

            // Finally, display single items (which didn't meet min_group_size)
            for (crate_name, downloads) in &single_items {
                writeln!(f, "  {:<24} {} downloads", crate_name, downloads)?;
            }

            Ok(())
        }

        // 1) Print the main summary lines
        writeln!(f, "Crate Activity Summary:")?;
        writeln!(f, "  Full Data Range:             {} to {}", 
                 self.date_interval_full_start, self.date_interval_full_end)?;
        writeln!(f, "  Date Interval (Last 1 Day):  {}", self.date_interval_1d)?;
        writeln!(f, "  Date Interval (Last 3 Days): {}", self.date_interval_3d)?;

        writeln!(f, "  Total Downloads:                   {}", self.total_downloads)?;
        writeln!(f, "  Average Daily Downloads:           {:.2}", self.avg_daily_downloads)?;
        writeln!(f, "  Average Daily Downloads per Crate: {:.2}", self.avg_daily_downloads_per_crate)?;
        writeln!(f, "  Median Daily Downloads:            {}", self.median_daily_downloads)?;
        writeln!(f, "  Crates Analyzed:                   {}", self.crates_analyzed)?;

        // 2) Group + display for each interval
        display_grouped_crates_compact(
            f,
            "Top Crates (Last 1 Day):",
            &self.top_crates_1d,
            self.min_group_size,
            self.expand_groups,
        )?;
        display_grouped_crates_compact(
            f,
            "Top Crates (Last 3 Days):",
            &self.top_crates_3d,
            self.min_group_size,
            self.expand_groups,
        )?;
        display_grouped_crates_compact(
            f,
            "Top Crates (Last 7 Days):",
            &self.top_crates_7d,
            self.min_group_size,
            self.expand_groups,
        )?;

        Ok(())
    }
}