datui_lib/widgets/
analysis.rs

1use ratatui::{
2    buffer::Buffer,
3    layout::{Constraint, Direction, Layout, Rect},
4    style::{Color, Modifier, Style},
5    symbols,
6    text::{Line, Span},
7    widgets::{
8        Axis, Bar, BarChart, BarGroup, Block, BorderType, Borders, Cell, Chart, Dataset, GraphType,
9        List, ListItem, Paragraph, Row, StatefulWidget, Table, TableState, Widget,
10    },
11};
12
13use crate::analysis_modal::{AnalysisFocus, AnalysisTool, AnalysisView, HistogramScale};
14use crate::config::Theme;
15use crate::statistics::{
16    beta_pdf, chi_squared_pdf, gamma_pdf, gamma_quantile, geometric_pmf, geometric_quantile,
17    students_t_pdf, weibull_pdf, AnalysisContext, AnalysisResults, DistributionAnalysis,
18    DistributionType,
19};
20use crate::widgets::datatable::DataTableState;
21
22pub struct AnalysisWidgetConfig<'a> {
23    pub state: &'a DataTableState,
24    pub results: Option<&'a AnalysisResults>,
25    pub context: &'a AnalysisContext,
26    pub view: AnalysisView,
27    pub selected_tool: Option<AnalysisTool>,
28    pub column_offset: usize,
29    pub selected_correlation: Option<(usize, usize)>,
30    pub focus: AnalysisFocus,
31    pub selected_theoretical_distribution: DistributionType,
32    pub histogram_scale: HistogramScale,
33    pub theme: &'a Theme,
34    pub table_cell_padding: u16,
35}
36
37pub struct AnalysisWidget<'a> {
38    _state: &'a DataTableState,
39    results: Option<&'a AnalysisResults>,
40    _context: &'a AnalysisContext,
41    view: AnalysisView,
42    selected_tool: Option<AnalysisTool>,
43    table_state: &'a mut TableState,
44    distribution_table_state: &'a mut TableState,
45    correlation_table_state: &'a mut TableState,
46    sidebar_state: &'a mut TableState,
47    column_offset: usize,
48    selected_correlation: Option<(usize, usize)>,
49    focus: AnalysisFocus,
50    selected_theoretical_distribution: DistributionType,
51    distribution_selector_state: &'a mut TableState,
52    histogram_scale: HistogramScale,
53    theme: &'a Theme,
54    table_cell_padding: u16,
55}
56
57impl<'a> AnalysisWidget<'a> {
58    pub fn new(
59        config: AnalysisWidgetConfig<'a>,
60        table_state: &'a mut TableState,
61        distribution_table_state: &'a mut TableState,
62        correlation_table_state: &'a mut TableState,
63        sidebar_state: &'a mut TableState,
64        distribution_selector_state: &'a mut TableState,
65    ) -> Self {
66        Self {
67            _state: config.state,
68            results: config.results,
69            _context: config.context,
70            view: config.view,
71            selected_tool: config.selected_tool,
72            table_state,
73            distribution_table_state,
74            correlation_table_state,
75            sidebar_state,
76            column_offset: config.column_offset,
77            selected_correlation: config.selected_correlation,
78            focus: config.focus,
79            selected_theoretical_distribution: config.selected_theoretical_distribution,
80            distribution_selector_state,
81            histogram_scale: config.histogram_scale,
82            theme: config.theme,
83            table_cell_padding: config.table_cell_padding,
84        }
85    }
86}
87
88impl<'a> Widget for AnalysisWidget<'a> {
89    fn render(self, area: Rect, buf: &mut Buffer) {
90        match self.view {
91            AnalysisView::Main => self.render_main_view(area, buf),
92            AnalysisView::DistributionDetail => self.render_distribution_detail(area, buf),
93            AnalysisView::CorrelationDetail => self.render_correlation_detail(area, buf),
94        }
95    }
96}
97
98impl<'a> AnalysisWidget<'a> {
99    fn render_main_view(self, area: Rect, buf: &mut Buffer) {
100        // Sidebar width (~30 characters)
101        let sidebar_width = 32u16;
102
103        // Full-screen layout: breadcrumb, main area (no separate keybind hints line)
104        let layout = Layout::default()
105            .direction(Direction::Vertical)
106            .constraints([
107                Constraint::Length(1), // Breadcrumb
108                Constraint::Fill(1),   // Main area + sidebar
109            ])
110            .split(area);
111
112        // Breadcrumb: tool name when a tool is selected, or "Analysis" when none selected
113        let tool_name = match self.selected_tool {
114            Some(AnalysisTool::Describe) => "Describe",
115            Some(AnalysisTool::DistributionAnalysis) => "Distribution Analysis",
116            Some(AnalysisTool::CorrelationMatrix) => "Correlation Matrix",
117            None => "Analysis",
118        };
119
120        let breadcrumb_text = if let Some(results) = self.results {
121            if results.sample_size.is_some() {
122                format!("{} (sampled)", tool_name)
123            } else {
124                tool_name.to_string()
125            }
126        } else {
127            tool_name.to_string()
128        };
129
130        let header_row_style = header_style(self.theme, "controls_bg", "table_header");
131        Paragraph::new(breadcrumb_text)
132            .style(header_row_style)
133            .render(layout[0], buf);
134
135        // Split main area into content area and sidebar
136        let main_layout = Layout::default()
137            .direction(Direction::Horizontal)
138            .constraints([
139                Constraint::Fill(1),               // Main content area
140                Constraint::Length(sidebar_width), // Sidebar
141            ])
142            .split(layout[1]);
143
144        // Main content area: instructions when no tool selected, else selected tool (or "Computing...")
145        match self.selected_tool {
146            None => {
147                const INSTRUCTION_LINES: u16 = 1;
148                let inner = Layout::default()
149                    .direction(Direction::Vertical)
150                    .constraints([
151                        Constraint::Min(0),
152                        Constraint::Length(INSTRUCTION_LINES),
153                        Constraint::Min(0),
154                    ])
155                    .split(main_layout[0]);
156                Paragraph::new("Select an analysis tool from the sidebar.")
157                    .centered()
158                    .style(Style::default().fg(self.theme.get("text_primary")))
159                    .render(inner[1], buf);
160            }
161            Some(tool) => {
162                if let Some(results) = self.results {
163                    match tool {
164                        AnalysisTool::Describe => {
165                            render_statistics_table(
166                                results,
167                                self.table_state,
168                                self.column_offset,
169                                main_layout[0],
170                                buf,
171                                self.theme,
172                                self.table_cell_padding,
173                            );
174                        }
175                        AnalysisTool::DistributionAnalysis => {
176                            render_distribution_table(
177                                results,
178                                self.distribution_table_state,
179                                self.column_offset,
180                                main_layout[0],
181                                buf,
182                                self.theme,
183                            );
184                        }
185                        AnalysisTool::CorrelationMatrix => {
186                            render_correlation_matrix(
187                                results,
188                                self.correlation_table_state,
189                                &self.selected_correlation,
190                                self.column_offset,
191                                main_layout[0],
192                                buf,
193                                self.theme,
194                            );
195                        }
196                    }
197                } else {
198                    Paragraph::new("Computing statistics...")
199                        .centered()
200                        .render(main_layout[0], buf);
201                }
202            }
203        }
204
205        // Sidebar: Tool list
206        render_sidebar(
207            main_layout[1],
208            buf,
209            self.sidebar_state,
210            self.selected_tool,
211            self.focus,
212            self.theme,
213        );
214
215        // Keybind hints are now shown on the main bottom bar (see lib.rs)
216    }
217
218    fn render_distribution_detail(self, area: Rect, buf: &mut Buffer) {
219        // Get selected distribution
220        let selected_idx = self.distribution_table_state.selected();
221        let dist_analysis: Option<&DistributionAnalysis> = self.results.and_then(|results| {
222            selected_idx.and_then(|idx| results.distribution_analyses.get(idx))
223        });
224
225        if let Some(dist) = dist_analysis {
226            // Layout: breadcrumb, main content (no keybind hints line)
227            let layout = Layout::default()
228                .direction(Direction::Vertical)
229                .constraints([
230                    Constraint::Length(1), // Breadcrumb
231                    Constraint::Fill(1),   // Main content
232                ])
233                .split(area);
234
235            // Breadcrumb with column name and Escape hint on top right
236            // Split breadcrumb area into left (title) and right (Escape hint)
237            let breadcrumb_layout = Layout::default()
238                .direction(Direction::Horizontal)
239                .constraints([
240                    Constraint::Fill(1),   // Title on left
241                    Constraint::Length(8), // Escape hint on right ("Esc Back" = 8 chars)
242                ])
243                .split(layout[0]);
244
245            let title_text = format!("Distribution Analysis: {}", dist.column_name);
246            let header_row_style = header_style(self.theme, "controls_bg", "table_header");
247            Paragraph::new(title_text)
248                .style(header_row_style)
249                .render(breadcrumb_layout[0], buf);
250
251            Paragraph::new("Esc Back")
252                .style(header_row_style)
253                .right_aligned()
254                .render(breadcrumb_layout[1], buf);
255
256            // Main content area - optimized layout
257            // Split into: condensed stats header, charts and selector area
258            let main_layout = Layout::default()
259                .direction(Direction::Vertical)
260                .constraints([
261                    Constraint::Length(1), // Condensed stats header (single line)
262                    Constraint::Fill(1),   // Charts and selector
263                ])
264                .split(layout[1]);
265
266            // Condensed header: Key statistics in one or two lines
267            // Use selected theoretical distribution type (dynamic)
268            render_condensed_statistics(
269                dist,
270                self.selected_theoretical_distribution,
271                main_layout[0],
272                buf,
273                self.theme,
274            );
275
276            // Split charts and selector horizontally
277            let content_layout = Layout::default()
278                .direction(Direction::Horizontal)
279                .constraints([
280                    Constraint::Percentage(75), // Q-Q plot and histogram
281                    Constraint::Percentage(25), // Distribution selector and settings
282                ])
283                .split(main_layout[1]);
284
285            // Right side: Split into distribution selector and settings
286            let right_layout = Layout::default()
287                .direction(Direction::Vertical)
288                .constraints([
289                    Constraint::Fill(1),   // Distribution selector (takes remaining space)
290                    Constraint::Length(4), // Settings box (4 lines: border + 2 content + border)
291                ])
292                .split(content_layout[1]);
293
294            // Left side: Q-Q plot and histogram with spacing
295            let charts_layout = Layout::default()
296                .direction(Direction::Vertical)
297                .constraints([
298                    Constraint::Percentage(52), // Q-Q plot (slightly reduced to make room for spacing)
299                    Constraint::Length(1),      // Vertical spacing between charts
300                    Constraint::Percentage(47), // Histogram (slightly reduced to make room for spacing)
301                ])
302                .split(content_layout[0]);
303
304            // Add padding around chart areas for better visual separation
305            let chart_padding = 1u16; // 1 character padding on all sides
306            let right_padding_extra = 1u16; // Extra padding on right side to separate from distribution box
307            let top_padding_extra = 1u16; // Extra padding at top to separate title from chart
308            let qq_plot_area = Rect::new(
309                charts_layout[0].left() + chart_padding,
310                charts_layout[0].top() + chart_padding + top_padding_extra, // Extra top padding
311                charts_layout[0]
312                    .width
313                    .saturating_sub(chart_padding) // Left padding
314                    .saturating_sub(right_padding_extra), // Extra right padding
315                charts_layout[0]
316                    .height
317                    .saturating_sub(chart_padding * 2)
318                    .saturating_sub(top_padding_extra), // Account for extra top padding
319            );
320            let histogram_area = Rect::new(
321                charts_layout[2].left() + chart_padding,
322                charts_layout[2].top() + chart_padding + top_padding_extra, // Extra top padding
323                charts_layout[2]
324                    .width
325                    .saturating_sub(chart_padding) // Left padding
326                    .saturating_sub(right_padding_extra), // Extra right padding
327                charts_layout[2]
328                    .height
329                    .saturating_sub(chart_padding * 2)
330                    .saturating_sub(top_padding_extra), // Account for extra top padding
331            );
332
333            // Calculate maximum label width for both charts to ensure alignment
334            // This needs to account for both Q-Q plot labels (data values) and histogram labels (counts)
335            let sorted_data = &dist.sorted_sample_values;
336            let max_label_width = if sorted_data.is_empty() {
337                1
338            } else {
339                let data_min = sorted_data[0];
340                let data_max = sorted_data[sorted_data.len() - 1];
341
342                // Q-Q plot labels: data_min, (data_min+data_max)/2, data_max formatted as {:.1}
343                let qq_label_bottom = format!("{:.1}", data_min);
344                let qq_label_mid = format!("{:.1}", (data_min + data_max) / 2.0);
345                let qq_label_top = format!("{:.1}", data_max);
346                let qq_max_width = qq_label_bottom
347                    .chars()
348                    .count()
349                    .max(qq_label_mid.chars().count())
350                    .max(qq_label_top.chars().count());
351
352                // Histogram labels: 0, global_max/2, global_max (formatted as integers)
353                // We need to estimate global_max - it's roughly the max of data bin counts and theory bin counts
354                // For estimation, use the data size as a proxy for maximum counts
355                let estimated_global_max = sorted_data.len();
356                let hist_label_0 = format!("{}", 0);
357                let hist_label_mid = format!("{}", estimated_global_max / 2);
358                let hist_label_max = format!("{}", estimated_global_max);
359                let hist_max_width = hist_label_0
360                    .chars()
361                    .count()
362                    .max(hist_label_mid.chars().count())
363                    .max(hist_label_max.chars().count());
364
365                // Use the maximum of both, adding 1 for padding
366                qq_max_width.max(hist_max_width)
367            };
368
369            let shared_y_axis_label_width = (max_label_width as u16).max(1) + 1; // Max label width + 1 char padding
370
371            // Calculate unified X-axis range for visual alignment between Q-Q plot and histogram
372            // This ensures both charts use the same X-axis scale for easy comparison
373            // Calculate unified X-axis range for both Q-Q plot and histogram
374            // Use ONLY actual data range (no padding, no theoretical extensions)
375            // This ensures log scale works correctly and both charts stay in sync
376            let unified_x_range = if !sorted_data.is_empty() {
377                let data_min = sorted_data[0];
378                let data_max = sorted_data[sorted_data.len() - 1];
379                // Use strict data range - no padding, no theoretical extensions
380                (data_min, data_max)
381            } else {
382                (0.0, 1.0) // Fallback for empty data
383            };
384
385            // Q-Q plot approximation (larger, better aspect ratio)
386            // Use selected theoretical distribution from selector
387            render_qq_plot(
388                dist,
389                self.selected_theoretical_distribution,
390                qq_plot_area,
391                buf,
392                shared_y_axis_label_width,
393                self.theme,
394                Some(unified_x_range),
395            );
396
397            // Histogram comparison (vertical bars)
398            // Use selected theoretical distribution from selector
399            // Check if log scale is requested but can't be used
400            // Use actual data values, not unified range (which may include theoretical bounds and padding)
401            let sorted_data = &dist.sorted_sample_values;
402            let can_use_log_scale = !sorted_data.is_empty() && sorted_data.iter().all(|&v| v > 0.0);
403            let log_scale_requested_but_unavailable =
404                matches!(self.histogram_scale, HistogramScale::Log) && !can_use_log_scale;
405
406            let histogram_config = HistogramRenderConfig {
407                dist,
408                dist_type: self.selected_theoretical_distribution,
409                area: histogram_area,
410                shared_y_axis_label_width,
411                theme: self.theme,
412                unified_x_range: Some(unified_x_range),
413                histogram_scale: self.histogram_scale,
414            };
415            render_distribution_histogram(histogram_config, buf);
416
417            // Right side: Distribution selector
418            render_distribution_selector(
419                dist,
420                self.selected_theoretical_distribution,
421                self.distribution_selector_state,
422                self.focus,
423                right_layout[0],
424                buf,
425                self.theme,
426            );
427
428            // Settings box below distribution selector
429            render_distribution_settings(
430                self.histogram_scale,
431                log_scale_requested_but_unavailable,
432                right_layout[1],
433                buf,
434                self.theme,
435            );
436
437        // No keybind hints line - removed
438        } else {
439            Paragraph::new("No distribution selected")
440                .centered()
441                .render(area, buf);
442        }
443    }
444
445    fn render_correlation_detail(self, _area: Rect, _buf: &mut Buffer) {
446        // TODO: Implement correlation pair detail view
447        // This will show relationship summary, scatter plot, and key statistics
448    }
449}
450
451fn render_statistics_table(
452    results: &AnalysisResults,
453    table_state: &mut TableState,
454    column_offset: usize,
455    area: Rect,
456    buf: &mut Buffer,
457    theme: &Theme,
458    table_cell_padding: u16,
459) {
460    let num_columns = results.column_statistics.len();
461    if num_columns == 0 {
462        Paragraph::new("No columns to display")
463            .centered()
464            .render(area, buf);
465        return;
466    }
467
468    // Statistics to display (in order) - internal names for matching data
469    let stat_names = vec![
470        "count",
471        "null_count",
472        "mean",
473        "std",
474        "min",
475        "25%",
476        "50%",
477        "75%",
478        "max",
479    ];
480    // Display names in Title case for headers
481    let stat_display_names = vec![
482        "Count", "Nulls", "Mean", "Std", "Min", "25%", "50%", "75%", "Max",
483    ];
484    let num_stats = stat_names.len();
485
486    // Calculate column widths based on header names and content (minimal spacing)
487    // First, determine minimum width for each column based on header length
488    // Note: ratatui Table adds 1 space between columns by default, so we don't add extra padding
489    let mut min_col_widths: Vec<u16> = stat_display_names
490        .iter()
491        .map(|name| name.chars().count() as u16) // header length (no extra padding - table handles spacing)
492        .collect();
493
494    // Scan all data to find maximum width needed for each column
495    for col_stat in &results.column_statistics {
496        for (stat_idx, stat_name) in stat_names.iter().enumerate() {
497            let value_str = match *stat_name {
498                "count" => col_stat.count.to_string(),
499                "null_count" => col_stat.null_count.to_string(),
500                "mean" => col_stat
501                    .numeric_stats
502                    .as_ref()
503                    .map(|n| format_num(n.mean))
504                    .unwrap_or_else(|| "-".to_string()),
505                "std" => col_stat
506                    .numeric_stats
507                    .as_ref()
508                    .map(|n| format_num(n.std))
509                    .unwrap_or_else(|| "-".to_string()),
510                "min" => {
511                    if let Some(ref num_stats) = col_stat.numeric_stats {
512                        format_num(num_stats.min)
513                    } else if let Some(ref cat_stats) = col_stat.categorical_stats {
514                        cat_stats.min.clone().unwrap_or_else(|| "-".to_string())
515                    } else {
516                        "-".to_string()
517                    }
518                }
519                "25%" => col_stat
520                    .numeric_stats
521                    .as_ref()
522                    .map(|n| format_num(n.q25))
523                    .unwrap_or_else(|| "-".to_string()),
524                "50%" => col_stat
525                    .numeric_stats
526                    .as_ref()
527                    .map(|n| format_num(n.median))
528                    .unwrap_or_else(|| "-".to_string()),
529                "75%" => col_stat
530                    .numeric_stats
531                    .as_ref()
532                    .map(|n| format_num(n.q75))
533                    .unwrap_or_else(|| "-".to_string()),
534                "max" => {
535                    if let Some(ref num_stats) = col_stat.numeric_stats {
536                        format_num(num_stats.max)
537                    } else if let Some(ref cat_stats) = col_stat.categorical_stats {
538                        cat_stats.max.clone().unwrap_or_else(|| "-".to_string())
539                    } else {
540                        "-".to_string()
541                    }
542                }
543                _ => "-".to_string(),
544            };
545            let value_len = value_str.chars().count() as u16;
546            // Ensure width is at least the header length (already initialized) AND value length
547            // This preserves header widths even if all data values are shorter
548            let header_len = stat_display_names[stat_idx].chars().count() as u16;
549            min_col_widths[stat_idx] = min_col_widths[stat_idx].max(value_len).max(header_len);
550            // must fit both header and content (no padding - table handles spacing)
551        }
552    }
553
554    // Locked column width (column name) - calculate from header text AND actual column names
555    let header_text = "Column";
556    let header_len = header_text.chars().count() as u16;
557    let max_col_name_len = results
558        .column_statistics
559        .iter()
560        .map(|cs| cs.name.chars().count() as u16)
561        .max()
562        .unwrap_or(header_len);
563    let locked_col_width = max_col_name_len.max(header_len).max(10); // min 10, must fit both header and data (no padding - table handles spacing)
564
565    // Calculate which columns can fit using same cell padding as main datatable
566    let column_spacing = table_cell_padding;
567
568    // Available width for stat columns = total width - locked column - spacing between locked and first stat
569    let available_width = area
570        .width
571        .saturating_sub(locked_col_width)
572        .saturating_sub(column_spacing);
573
574    let mut used_width_from_zero = 0u16;
575    let mut max_visible_from_zero = 0;
576
577    for width in min_col_widths.iter() {
578        let spacing_needed = if max_visible_from_zero > 0 {
579            column_spacing
580        } else {
581            0
582        };
583        let total_needed = spacing_needed + width;
584
585        if used_width_from_zero + total_needed <= available_width {
586            used_width_from_zero += total_needed;
587            max_visible_from_zero += 1;
588        } else {
589            break;
590        }
591    }
592
593    max_visible_from_zero = max_visible_from_zero.max(1);
594
595    let effective_offset = if max_visible_from_zero >= num_stats {
596        0
597    } else {
598        column_offset.min(num_stats.saturating_sub(1))
599    };
600
601    let start_stat = effective_offset;
602
603    let mut used_width = 0u16;
604    let mut max_visible_stats = 0;
605
606    for width in min_col_widths
607        .iter()
608        .skip(start_stat)
609        .take(num_stats - start_stat)
610    {
611        let spacing_needed = if max_visible_stats > 0 {
612            column_spacing
613        } else {
614            0
615        };
616        let total_needed = spacing_needed + width;
617
618        if used_width + total_needed <= available_width {
619            used_width += total_needed;
620            max_visible_stats += 1;
621        } else {
622            break;
623        }
624    }
625
626    max_visible_stats = max_visible_stats.max(1); // At least show 1 column
627
628    let end_stat = (start_stat + max_visible_stats).min(num_stats);
629    let visible_stats: Vec<usize> = (start_stat..end_stat).collect();
630
631    if visible_stats.is_empty() {
632        return;
633    }
634
635    let mut rows = Vec::new();
636
637    let mut header_cells = vec![Cell::from("Column").style(Style::default())];
638    for &stat_idx in &visible_stats {
639        header_cells.push(Cell::from(stat_display_names[stat_idx]).style(Style::default()));
640    }
641    let header_row_style = header_style(theme, "controls_bg", "table_header");
642    let header_row = Row::new(header_cells.clone()).style(header_row_style);
643
644    for col_stat in &results.column_statistics {
645        let mut cells = vec![Cell::from(col_stat.name.as_str())
646            .style(Style::default().fg(theme.get("text_primary")))];
647        for &stat_idx in &visible_stats {
648            let stat_name = stat_names[stat_idx];
649            let value = match stat_name {
650                "count" => col_stat.count.to_string(),
651                "null_count" => col_stat.null_count.to_string(),
652                "mean" => col_stat
653                    .numeric_stats
654                    .as_ref()
655                    .map(|n| format_num(n.mean))
656                    .unwrap_or_else(|| "-".to_string()),
657                "std" => col_stat
658                    .numeric_stats
659                    .as_ref()
660                    .map(|n| format_num(n.std))
661                    .unwrap_or_else(|| "-".to_string()),
662                "min" => {
663                    if let Some(ref num_stats) = col_stat.numeric_stats {
664                        format_num(num_stats.min)
665                    } else if let Some(ref cat_stats) = col_stat.categorical_stats {
666                        cat_stats.min.clone().unwrap_or_else(|| "-".to_string())
667                    } else {
668                        "-".to_string()
669                    }
670                }
671                "25%" => col_stat
672                    .numeric_stats
673                    .as_ref()
674                    .map(|n| format_num(n.q25))
675                    .unwrap_or_else(|| "-".to_string()),
676                "50%" => col_stat
677                    .numeric_stats
678                    .as_ref()
679                    .map(|n| format_num(n.median))
680                    .unwrap_or_else(|| "-".to_string()),
681                "75%" => col_stat
682                    .numeric_stats
683                    .as_ref()
684                    .map(|n| format_num(n.q75))
685                    .unwrap_or_else(|| "-".to_string()),
686                "max" => {
687                    if let Some(ref num_stats) = col_stat.numeric_stats {
688                        format_num(num_stats.max)
689                    } else if let Some(ref cat_stats) = col_stat.categorical_stats {
690                        cat_stats.max.clone().unwrap_or_else(|| "-".to_string())
691                    } else {
692                        "-".to_string()
693                    }
694                }
695                _ => "-".to_string(),
696            };
697
698            cells.push(Cell::from(value));
699        }
700
701        rows.push(Row::new(cells));
702    }
703
704    let mut constraints = vec![Constraint::Length(locked_col_width)];
705    for &stat_idx in &visible_stats {
706        // Use minimum width needed (ratatui will add spacing between columns)
707        constraints.push(Constraint::Length(min_col_widths[stat_idx]));
708    }
709
710    let table = Table::new(rows, constraints)
711        .header(header_row)
712        .column_spacing(table_cell_padding)
713        .row_highlight_style(Style::default().add_modifier(Modifier::REVERSED));
714
715    // Use StatefulWidget for row selection
716    StatefulWidget::render(table, area, buf, table_state);
717}
718
719fn format_num(n: f64) -> String {
720    if n.is_nan() {
721        "-".to_string()
722    } else if n.abs() >= 1000.0 || (n.abs() < 0.01 && n != 0.0) {
723        format!("{:.2e}", n)
724    } else {
725        format!("{:.2}", n)
726    }
727}
728
729// Phase 6: Format p-value with special handling for very small values
730fn format_pvalue(p: f64) -> String {
731    if p < 0.001 {
732        "<0.001".to_string()
733    } else {
734        format!("{:.3}", p)
735    }
736}
737
738/// Build header-style: bg+fg when bg_key is not Reset, else fg-only.
739fn header_style(theme: &Theme, bg_key: &str, fg_key: &str) -> Style {
740    let bg = theme.get(bg_key);
741    let fg = theme.get(fg_key);
742    if bg == Color::Reset {
743        Style::default().fg(fg)
744    } else {
745        Style::default().bg(bg).fg(fg)
746    }
747}
748
749fn render_distribution_table(
750    results: &AnalysisResults,
751    table_state: &mut TableState,
752    column_offset: usize,
753    area: Rect,
754    buf: &mut Buffer,
755    theme: &Theme,
756) {
757    if results.distribution_analyses.is_empty() {
758        Paragraph::new("No numeric columns for distribution analysis")
759            .centered()
760            .render(area, buf);
761        return;
762    }
763
764    // Column headers for width calculation (excluding "Column" which will be locked)
765    // Phase 6: Add P-value column after Distribution
766    let column_names = [
767        "Distribution",
768        "P-value",
769        "Shapiro-Wilk",
770        "SW p-value",
771        "CV",
772        "Outliers",
773        "Skewness",
774        "Kurtosis",
775    ];
776    let num_stats = column_names.len();
777
778    // Calculate column widths based on header names and content (minimal spacing)
779    // Note: ratatui Table adds 1 space between columns by default, so we don't add extra padding
780    let mut min_col_widths: Vec<u16> = column_names
781        .iter()
782        .map(|name| name.chars().count() as u16) // header length (no extra padding - table handles spacing)
783        .collect();
784
785    // Calculate column name width (for locked column)
786    let header_text = "Column";
787    let header_len = header_text.chars().count() as u16;
788    let max_col_name_len = results
789        .distribution_analyses
790        .iter()
791        .map(|da| da.column_name.chars().count() as u16)
792        .max()
793        .unwrap_or(header_len);
794    let locked_col_width = max_col_name_len.max(header_len).max(10);
795
796    // Scan all data to find maximum width needed for each column (excluding Column)
797    for dist_analysis in &results.distribution_analyses {
798        // Outlier count with percentage
799        let outlier_text = if dist_analysis.outliers.total_count > 0 {
800            format!(
801                "{} ({:.1}%)",
802                dist_analysis.outliers.total_count, dist_analysis.outliers.percentage
803            )
804        } else {
805            "0 (0.0%)".to_string()
806        };
807
808        // Shapiro-Wilk statistic and p-value formatting
809        let sw_stat_text = dist_analysis
810            .characteristics
811            .shapiro_wilk_stat
812            .map(|s| format!("{:.3}", s))
813            .unwrap_or_else(|| "N/A".to_string());
814        let sw_pvalue_text = dist_analysis
815            .characteristics
816            .shapiro_wilk_pvalue
817            .map(|p| format!("{:.3}", p))
818            .unwrap_or_else(|| "N/A".to_string());
819
820        // Phase 6: Add p-value to column values
821        let pvalue_text = format_pvalue(dist_analysis.confidence);
822
823        // Update minimum widths based on content (skip column name)
824        let col_values = [
825            format!("{}", dist_analysis.distribution_type),
826            pvalue_text.clone(),
827            sw_stat_text.clone(),
828            sw_pvalue_text.clone(),
829            format!(
830                "{:.4}",
831                dist_analysis.characteristics.coefficient_of_variation
832            ),
833            outlier_text.clone(),
834            format_num(dist_analysis.characteristics.skewness),
835            format_num(dist_analysis.characteristics.kurtosis),
836        ];
837
838        for (idx, value) in col_values.iter().enumerate() {
839            let value_len = value.chars().count() as u16;
840            let header_len = column_names[idx].chars().count() as u16;
841            min_col_widths[idx] = min_col_widths[idx].max(value_len).max(header_len);
842        }
843    }
844
845    // Calculate which columns can fit (similar to describe table)
846    let column_spacing = 1u16;
847    let available_width = area
848        .width
849        .saturating_sub(locked_col_width)
850        .saturating_sub(column_spacing); // Space between locked column and first stat column
851
852    // Determine which statistics to show (column_offset refers to stat columns, not column name)
853    let start_stat = column_offset.min(num_stats.saturating_sub(1));
854
855    // Calculate how many stat columns can fit starting from start_stat
856    let mut used_width = 0u16;
857    let mut max_visible_stats = 0;
858
859    for width in min_col_widths
860        .iter()
861        .skip(start_stat)
862        .take(num_stats - start_stat)
863    {
864        let spacing_needed = if max_visible_stats > 0 {
865            column_spacing
866        } else {
867            0
868        };
869        let total_needed = spacing_needed + width;
870
871        if used_width + total_needed <= available_width {
872            used_width += total_needed;
873            max_visible_stats += 1;
874        } else {
875            break;
876        }
877    }
878
879    max_visible_stats = max_visible_stats.max(1); // At least show 1 column
880    let end_stat = (start_stat + max_visible_stats).min(num_stats);
881    let visible_stats: Vec<usize> = (start_stat..end_stat).collect();
882
883    if visible_stats.is_empty() {
884        return;
885    }
886
887    let mut rows = Vec::new();
888
889    let mut header_cells = vec![Cell::from("Column").style(Style::default())];
890    for &stat_idx in &visible_stats {
891        header_cells.push(Cell::from(column_names[stat_idx]).style(Style::default()));
892    }
893    let header_row_style = header_style(theme, "controls_bg", "table_header");
894    let header_row = Row::new(header_cells).style(header_row_style);
895    for dist_analysis in &results.distribution_analyses {
896        // Color coding for distribution type based on fit quality only
897        // Green = good fit (>0.75), Yellow = moderate (0.5-0.75), Red = poor (<0.5)
898        let type_color = if dist_analysis.fit_quality > 0.75 {
899            theme.get("distribution_normal")
900        } else if dist_analysis.fit_quality > 0.5 {
901            theme.get("distribution_skewed")
902        } else {
903            theme.get("outlier_marker")
904        };
905
906        // Outlier count with percentage
907        let outlier_text = if dist_analysis.outliers.total_count > 0 {
908            format!(
909                "{} ({:.1}%)",
910                dist_analysis.outliers.total_count, dist_analysis.outliers.percentage
911            )
912        } else {
913            "0 (0.0%)".to_string()
914        };
915
916        // Relaxed outlier color thresholds - red only for very high percentages that might indicate data errors
917        let outlier_style = if dist_analysis.outliers.percentage > 20.0 {
918            // Red: very high outlier percentage (>20%) - might indicate data errors
919            Style::default().fg(theme.get("outlier_marker"))
920        } else if dist_analysis.outliers.percentage > 5.0 {
921            // Yellow for moderate outliers (5-20%)
922            Style::default().fg(theme.get("distribution_skewed"))
923        } else {
924            // Default (white) for low outlier percentages (0-5%)
925            Style::default()
926        };
927
928        // Get skewness and kurtosis values for styling
929        let skewness_value = dist_analysis.characteristics.skewness.abs();
930        let kurtosis_value = dist_analysis.characteristics.kurtosis;
931
932        // Skewness color coding: similar to describe table
933        let skewness_style = if skewness_value >= 3.0 {
934            Style::default().fg(theme.get("outlier_marker"))
935        } else if skewness_value >= 1.0 {
936            Style::default().fg(theme.get("distribution_skewed"))
937        } else {
938            Style::default()
939        };
940
941        // Kurtosis color coding: 3.0 is normal, high/low is notable
942        let kurtosis_style = if (kurtosis_value - 3.0).abs() >= 3.0 {
943            Style::default().fg(theme.get("outlier_marker"))
944        } else if (kurtosis_value - 3.0).abs() >= 1.0 {
945            Style::default().fg(theme.get("distribution_skewed"))
946        } else {
947            Style::default()
948        };
949
950        // Format p-value with color coding
951        // Green = good (>0.05), Yellow = moderate (0.01-0.05), Red = poor (≤0.01)
952        let pvalue_text = format_pvalue(dist_analysis.confidence);
953        let pvalue_style = if dist_analysis.confidence > 0.05 {
954            Style::default().fg(theme.get("distribution_normal"))
955        } else if dist_analysis.confidence > 0.01 {
956            Style::default().fg(theme.get("distribution_skewed"))
957        } else {
958            Style::default().fg(theme.get("outlier_marker"))
959        };
960
961        // Shapiro-Wilk statistic and p-value formatting
962        let sw_stat_text = dist_analysis
963            .characteristics
964            .shapiro_wilk_stat
965            .map(|s| format!("{:.3}", s))
966            .unwrap_or_else(|| "N/A".to_string());
967        let sw_pvalue_text = dist_analysis
968            .characteristics
969            .shapiro_wilk_pvalue
970            .map(|p| format!("{:.3}", p))
971            .unwrap_or_else(|| "N/A".to_string());
972
973        // Color coding for SW p-value: same semantics as p-value column
974        // Green = normal (>0.05), Yellow = moderate (0.01-0.05), Red = non-normal (≤0.01)
975        let sw_pvalue_style = dist_analysis
976            .characteristics
977            .shapiro_wilk_pvalue
978            .map(|p| {
979                if p > 0.05 {
980                    Style::default().fg(theme.get("distribution_normal"))
981                } else if p > 0.01 {
982                    Style::default().fg(theme.get("distribution_skewed"))
983                } else {
984                    Style::default().fg(theme.get("outlier_marker"))
985                }
986            })
987            .unwrap_or_default();
988
989        // Build row with locked column name + visible stat values
990        // Use explicit text_primary so column names stay visible (avoids black-on-black)
991        let mut cells = vec![Cell::from(dist_analysis.column_name.as_str())
992            .style(Style::default().fg(theme.get("text_primary")))];
993
994        // Add visible statistic values
995        for &stat_idx in &visible_stats {
996            let cell = match stat_idx {
997                0 => Cell::from(format!("{}", dist_analysis.distribution_type))
998                    .style(Style::default().fg(type_color)),
999                1 => Cell::from(pvalue_text.clone()).style(pvalue_style),
1000                2 => Cell::from(sw_stat_text.clone()),
1001                3 => Cell::from(sw_pvalue_text.clone()).style(sw_pvalue_style),
1002                4 => Cell::from(format!(
1003                    "{:.4}",
1004                    dist_analysis.characteristics.coefficient_of_variation
1005                ))
1006                .style(
1007                    if dist_analysis.characteristics.coefficient_of_variation > 1.0 {
1008                        Style::default().fg(theme.get("distribution_skewed")) // High variability
1009                    } else {
1010                        Style::default()
1011                    },
1012                ),
1013                5 => Cell::from(outlier_text.clone()).style(outlier_style),
1014                6 => Cell::from(format_num(dist_analysis.characteristics.skewness))
1015                    .style(skewness_style),
1016                7 => Cell::from(format_num(dist_analysis.characteristics.kurtosis))
1017                    .style(kurtosis_style),
1018                _ => Cell::from(""),
1019            };
1020            cells.push(cell);
1021        }
1022
1023        rows.push(Row::new(cells));
1024    }
1025
1026    let mut constraints = vec![Constraint::Length(locked_col_width)];
1027    for &stat_idx in &visible_stats {
1028        constraints.push(Constraint::Length(min_col_widths[stat_idx]));
1029    }
1030
1031    if visible_stats.len() == num_stats && constraints.len() > 1 {
1032        let last_idx = constraints.len() - 1;
1033        constraints[last_idx] = Constraint::Fill(1);
1034    }
1035
1036    let table = Table::new(rows, constraints)
1037        .header(header_row)
1038        .row_highlight_style(Style::default().add_modifier(Modifier::REVERSED));
1039
1040    StatefulWidget::render(table, area, buf, table_state);
1041}
1042
1043fn render_correlation_matrix(
1044    results: &AnalysisResults,
1045    table_state: &mut TableState,
1046    selected_cell: &Option<(usize, usize)>,
1047    column_offset: usize,
1048    area: Rect,
1049    buf: &mut Buffer,
1050    theme: &Theme,
1051) {
1052    let correlation_matrix = match &results.correlation_matrix {
1053        Some(cm) => cm,
1054        None => {
1055            Paragraph::new("No correlation matrix available (need at least 2 numeric columns)")
1056                .centered()
1057                .render(area, buf);
1058            return;
1059        }
1060    };
1061
1062    if correlation_matrix.columns.is_empty() {
1063        Paragraph::new("No numeric columns for correlation matrix")
1064            .centered()
1065            .render(area, buf);
1066        return;
1067    }
1068
1069    let n = correlation_matrix.columns.len();
1070
1071    // Calculate column widths - ensure they're wide enough for content
1072    let row_header_width = 20u16;
1073    let cell_width = 12u16; // Wide enough for "-1.00" format
1074    let column_spacing = 1u16; // Table widget adds 1 space between columns
1075
1076    // Calculate how many columns can fit
1077    let available_width = area.width.saturating_sub(row_header_width);
1078    let mut used_width = 0u16;
1079    let mut visible_cols = 0usize;
1080
1081    // Start from column_offset
1082    let start_col = column_offset.min(n.saturating_sub(1));
1083
1084    for _col_idx in start_col..n {
1085        let needed = if visible_cols > 0 {
1086            column_spacing + cell_width
1087        } else {
1088            cell_width
1089        };
1090
1091        if used_width + needed <= available_width {
1092            used_width += needed;
1093            visible_cols += 1;
1094        } else {
1095            break;
1096        }
1097    }
1098
1099    visible_cols = visible_cols.max(1);
1100    let end_col = (start_col + visible_cols).min(n);
1101
1102    let (selected_row, selected_col) = selected_cell.unwrap_or((n, n));
1103
1104    let header_row_style = header_style(theme, "controls_bg", "table_header");
1105    let dim_header_style = header_style(theme, "controls_bg", "table_header");
1106
1107    let mut header_cells = vec![Cell::from("")];
1108    for j in start_col..end_col {
1109        let col_name = &correlation_matrix.columns[j];
1110        let is_selected_col = selected_cell.is_some() && j == selected_col;
1111        let cell_style = if is_selected_col {
1112            dim_header_style
1113        } else {
1114            header_row_style
1115        };
1116        header_cells.push(Cell::from(col_name.as_str()).style(cell_style));
1117    }
1118
1119    let header_row = Row::new(header_cells).style(header_row_style);
1120
1121    // Data rows - only render visible rows (handled by TableState's visible_rows)
1122    // But we render all rows and let Table widget handle vertical scrolling
1123    let mut rows = Vec::new();
1124    for (i, col_name) in correlation_matrix.columns.iter().enumerate() {
1125        // Determine if this is the selected row
1126        let is_selected_row = selected_cell.is_some() && i == selected_row;
1127
1128        // Row header cell - dim highlight if selected row
1129        let row_header_style = if is_selected_row {
1130            Style::default().bg(theme.get("surface"))
1131        } else {
1132            Style::default()
1133        };
1134        let mut cells = vec![Cell::from(col_name.as_str()).style(row_header_style)];
1135
1136        for col_idx in start_col..end_col {
1137            let correlation = correlation_matrix.correlations[i][col_idx];
1138            let text_color = get_correlation_color(correlation, theme);
1139
1140            let cell_text = if i == col_idx {
1141                "1.00".to_string()
1142            } else {
1143                format!("{:.2}", correlation)
1144            };
1145
1146            let is_selected_cell =
1147                selected_cell.is_some() && i == selected_row && col_idx == selected_col;
1148            let is_in_selected_col = selected_cell.is_some() && col_idx == selected_col;
1149
1150            let cell_style = if is_selected_cell {
1151                // Selected cell: use bright background with inverted text for visibility
1152                Style::default()
1153                    .fg(theme.get("text_inverse"))
1154                    .bg(theme.get("modal_border_active"))
1155            } else if is_selected_row || is_in_selected_col {
1156                // Selected row or column: dim background with colored text
1157                Style::default().fg(text_color).bg(theme.get("surface"))
1158            } else {
1159                // Normal cell: just text color
1160                Style::default().fg(text_color)
1161            };
1162
1163            cells.push(Cell::from(cell_text).style(cell_style));
1164        }
1165
1166        let row_style = if is_selected_row {
1167            Style::default().bg(theme.get("surface"))
1168        } else {
1169            Style::default()
1170        };
1171
1172        rows.push(Row::new(cells).style(row_style));
1173    }
1174
1175    // Build constraints - fixed widths to prevent clipping
1176    let mut constraints = vec![Constraint::Length(row_header_width)];
1177    for _ in 0..visible_cols {
1178        constraints.push(Constraint::Length(cell_width));
1179    }
1180
1181    let last_idx = constraints.len().saturating_sub(1);
1182    if visible_cols == n && constraints.len() > 1 {
1183        constraints[last_idx] = Constraint::Fill(1);
1184    }
1185
1186    let table = Table::new(rows, constraints)
1187        .header(header_row)
1188        .column_spacing(1);
1189
1190    StatefulWidget::render(table, area, buf, table_state);
1191}
1192
1193fn get_correlation_color(correlation: f64, theme: &Theme) -> Color {
1194    let abs_corr = correlation.abs();
1195
1196    if abs_corr < 0.05 {
1197        // No correlation (close to 0) - dimmed
1198        theme.get("dimmed")
1199    } else if abs_corr < 0.3 {
1200        // Low correlation - normal text
1201        theme.get("text_primary")
1202    } else if correlation > 0.0 {
1203        // Positive correlation - keybind hints color (UI element, not chart)
1204        theme.get("keybind_hints")
1205    } else {
1206        // Negative correlation - error/warning color
1207        theme.get("outlier_marker")
1208    }
1209}
1210
1211fn render_distribution_selector(
1212    dist: &DistributionAnalysis,
1213    selected_dist: DistributionType,
1214    selector_state: &mut TableState,
1215    focus: AnalysisFocus,
1216    area: Rect,
1217    buf: &mut Buffer,
1218    theme: &Theme,
1219) {
1220    let distributions = [
1221        ("Normal", DistributionType::Normal),
1222        ("Log-Normal", DistributionType::LogNormal),
1223        ("Uniform", DistributionType::Uniform),
1224        ("Power Law", DistributionType::PowerLaw),
1225        ("Exponential", DistributionType::Exponential),
1226        ("Beta", DistributionType::Beta),
1227        ("Gamma", DistributionType::Gamma),
1228        ("Chi-Squared", DistributionType::ChiSquared),
1229        ("Student's t", DistributionType::StudentsT),
1230        ("Poisson", DistributionType::Poisson),
1231        ("Bernoulli", DistributionType::Bernoulli),
1232        ("Binomial", DistributionType::Binomial),
1233        ("Geometric", DistributionType::Geometric),
1234        ("Weibull", DistributionType::Weibull),
1235    ];
1236
1237    // Use stored p-values from initial analysis - no recalculation needed
1238    // These were calculated during infer_distribution() with the same data and method
1239    let mut distribution_scores: Vec<(usize, &str, DistributionType, f64)> = distributions
1240        .iter()
1241        .enumerate()
1242        .map(|(idx, (name, dist_type))| {
1243            // Use stored p-values from initial analysis - no recalculation needed
1244            let p_value = dist
1245                .all_distribution_pvalues
1246                .get(dist_type)
1247                .copied()
1248                .unwrap_or_else(|| {
1249                    // Fallback: if not in stored values (e.g., Geometric skipped), use placeholder
1250                    if *dist_type == DistributionType::Geometric {
1251                        0.01 // Placeholder to prevent freezes
1252                    } else {
1253                        0.0 // Default for untested distributions
1254                    }
1255                });
1256            (idx, *name, *dist_type, p_value)
1257        })
1258        .collect();
1259
1260    // Sort by p-value (descending) - best fit on top
1261    distribution_scores.sort_by(|a, b| b.3.partial_cmp(&a.3).unwrap_or(std::cmp::Ordering::Equal));
1262
1263    // Find position of selected distribution in sorted list
1264    let selected_pos = distribution_scores
1265        .iter()
1266        .position(|(_, _, dt, _)| *dt == selected_dist)
1267        .unwrap_or(0);
1268
1269    // Only sync selector state when absolutely necessary to prevent jumping during navigation
1270    // Trust the user's navigation state - only fix if selection is uninitialized or out of bounds
1271    let current_selection = selector_state.selected();
1272    if current_selection.is_none() {
1273        // Initial state: set to selected distribution position
1274        selector_state.select(Some(selected_pos));
1275    } else if let Some(current_idx) = current_selection {
1276        // Only fix if index is out of bounds - otherwise trust the current selection
1277        // This prevents the sync logic from interfering with user navigation
1278        if current_idx >= distribution_scores.len() {
1279            selector_state.select(Some(selected_pos));
1280        }
1281        // Otherwise, keep current selection (user is navigating or selection is valid)
1282    }
1283
1284    // Create table rows from sorted list
1285    let rows: Vec<Row> = distribution_scores
1286        .iter()
1287        .enumerate()
1288        .map(|(sorted_idx, (_, name, _dist_type, p_value))| {
1289            let is_focused = focus == AnalysisFocus::DistributionSelector
1290                && selector_state.selected() == Some(sorted_idx);
1291
1292            let name_style = if is_focused {
1293                header_style(theme, "controls_bg", "table_header")
1294            } else {
1295                Style::default().fg(theme.get("text_primary"))
1296            };
1297
1298            // Style based on p-value
1299            let pvalue_style = if *p_value > 0.05 {
1300                Style::default().fg(theme.get("distribution_normal")) // Good fit
1301            } else if *p_value > 0.01 {
1302                Style::default().fg(theme.get("distribution_skewed")) // Marginal fit
1303            } else {
1304                Style::default().fg(theme.get("outlier_marker")) // Poor fit
1305            };
1306
1307            Row::new(vec![
1308                Cell::from(name.to_string()).style(name_style),
1309                Cell::from(format_pvalue(*p_value)).style(pvalue_style),
1310            ])
1311        })
1312        .collect();
1313
1314    let h = header_style(theme, "controls_bg", "table_header");
1315    let header = Row::new(vec![
1316        Cell::from("Name").style(h),
1317        Cell::from("P-value").style(h),
1318    ]);
1319
1320    let table = Table::new(
1321        rows,
1322        vec![
1323            Constraint::Fill(1),   // Name column takes remaining space
1324            Constraint::Length(7), // P-value column: "<0.001" or "0.000" = 7 chars max
1325        ],
1326    )
1327    .header(header)
1328    .block(
1329        Block::default()
1330            .title("Distribution")
1331            .borders(Borders::ALL)
1332            .border_type(BorderType::Rounded)
1333            .border_style(Style::default().fg(theme.get("sidebar_border"))),
1334    )
1335    .row_highlight_style(Style::default().add_modifier(Modifier::REVERSED));
1336
1337    StatefulWidget::render(table, area, buf, selector_state);
1338}
1339
1340struct HistogramRenderConfig<'a> {
1341    dist: &'a DistributionAnalysis,
1342    dist_type: DistributionType,
1343    area: Rect,
1344    shared_y_axis_label_width: u16,
1345    theme: &'a Theme,
1346    unified_x_range: Option<(f64, f64)>,
1347    histogram_scale: HistogramScale,
1348}
1349
1350fn render_distribution_settings(
1351    histogram_scale: HistogramScale,
1352    log_scale_unavailable: bool,
1353    area: Rect,
1354    buf: &mut Buffer,
1355    theme: &Theme,
1356) {
1357    let block = Block::default()
1358        .title("Settings")
1359        .borders(Borders::ALL)
1360        .border_type(BorderType::Rounded)
1361        .border_style(Style::default().fg(theme.get("sidebar_border")));
1362
1363    // Settings content: Scale option
1364    let scale_label = "Scale:";
1365    let (scale_value, scale_value_style) = if log_scale_unavailable {
1366        // Log scale requested but can't be used (e.g., negative values)
1367        // Show "Linear" in warning color to indicate fallback
1368        ("Linear", Style::default().fg(theme.get("warning")))
1369    } else {
1370        match histogram_scale {
1371            HistogramScale::Linear => ("Linear", Style::default().fg(theme.get("text_primary"))),
1372            HistogramScale::Log => ("Log", Style::default().fg(theme.get("text_primary"))),
1373        }
1374    };
1375
1376    // Layout for settings content (inside block)
1377    let inner_area = block.inner(area);
1378    let settings_layout = Layout::default()
1379        .direction(Direction::Vertical)
1380        .constraints([
1381            Constraint::Length(1), // Scale setting line
1382            Constraint::Fill(1),   // Remaining space
1383        ])
1384        .split(inner_area);
1385
1386    // Scale setting: label on left, value on right
1387    let scale_layout = Layout::default()
1388        .direction(Direction::Horizontal)
1389        .constraints([
1390            Constraint::Length(scale_label.chars().count() as u16 + 1), // Label + spacing
1391            Constraint::Fill(1),                                        // Value
1392        ])
1393        .split(settings_layout[0]);
1394
1395    let scale_label_style = Style::default().fg(theme.get("text_secondary"));
1396
1397    Paragraph::new(scale_label)
1398        .style(scale_label_style)
1399        .render(scale_layout[0], buf);
1400
1401    Paragraph::new(scale_value)
1402        .style(scale_value_style)
1403        .render(scale_layout[1], buf);
1404
1405    block.render(area, buf);
1406}
1407
1408fn render_sidebar(
1409    area: Rect,
1410    buf: &mut Buffer,
1411    sidebar_state: &mut TableState,
1412    selected_tool: Option<AnalysisTool>,
1413    focus: AnalysisFocus,
1414    theme: &Theme,
1415) {
1416    let tools = [
1417        ("Describe", AnalysisTool::Describe),
1418        ("Distribution Analysis", AnalysisTool::DistributionAnalysis),
1419        ("Correlation Matrix", AnalysisTool::CorrelationMatrix),
1420    ];
1421
1422    let text_primary = theme.get("text_primary");
1423    // Use REVERSED for focused row (like main table) so selection is always visible,
1424    // even when controls_bg is "default"/none.
1425    let focused_style = Style::default().add_modifier(Modifier::REVERSED);
1426
1427    let items: Vec<ListItem> = tools
1428        .iter()
1429        .enumerate()
1430        .map(|(idx, (name, tool))| {
1431            let is_selected = selected_tool == Some(*tool);
1432            let is_focused =
1433                focus == AnalysisFocus::Sidebar && sidebar_state.selected() == Some(idx);
1434            let prefix = if is_selected { "> " } else { "  " };
1435            let style = if is_focused {
1436                focused_style
1437            } else {
1438                Style::default().fg(text_primary)
1439            };
1440            ListItem::new(format!("{}{}", prefix, name)).style(style)
1441        })
1442        .collect();
1443
1444    let border_color = if focus == AnalysisFocus::Sidebar {
1445        theme.get("modal_border_active")
1446    } else {
1447        theme.get("modal_border")
1448    };
1449    let block = Block::default()
1450        .title("Analysis Tools")
1451        .borders(Borders::ALL)
1452        .border_type(BorderType::Rounded)
1453        .border_style(Style::default().fg(border_color));
1454
1455    let list = List::new(items).block(block);
1456
1457    Widget::render(list, area, buf);
1458}
1459
1460fn render_distribution_histogram(config: HistogramRenderConfig, buf: &mut Buffer) {
1461    // Use BarChart widget to show histogram comparing data vs theoretical distribution
1462    // Use fixed-width bins that span both data range and theoretical distribution range
1463    let HistogramRenderConfig {
1464        dist,
1465        dist_type,
1466        area,
1467        shared_y_axis_label_width,
1468        theme,
1469        unified_x_range,
1470        histogram_scale,
1471    } = config;
1472    let sorted_data = &dist.sorted_sample_values;
1473
1474    if sorted_data.is_empty() || sorted_data.len() < 3 {
1475        Paragraph::new("Insufficient data for histogram")
1476            .centered()
1477            .render(area, buf);
1478        return;
1479    }
1480
1481    let n = sorted_data.len();
1482
1483    // Determine bin range: use percentile-based robust range (P1-P99) for all distributions
1484    // This is a best practice that gives more visual space to the bulk of data while
1485    // still showing outliers in edge bins. Matches professional tools like Observable Canvases.
1486    let data_min = sorted_data[0];
1487    let data_max = sorted_data[n - 1];
1488    let data_range = data_max - data_min;
1489
1490    if data_range <= 0.0 {
1491        // Constant data: all values are the same
1492        Paragraph::new("Constant data: all values are identical")
1493            .centered()
1494            .render(area, buf);
1495        return;
1496    }
1497
1498    // Use unified X-axis range (strict data range, no padding or extensions)
1499    // This keeps both Q-Q plot and histogram in sync and ensures log scale works correctly
1500    let (hist_min, hist_max, hist_range) = if let Some((unified_min, unified_max)) = unified_x_range
1501    {
1502        // Use unified range directly - it's already the strict data range
1503        let range = unified_max - unified_min;
1504        (unified_min, unified_max, range)
1505    } else {
1506        // Fallback: use actual data range (shouldn't happen if unified_x_range is always provided)
1507        (data_min, data_max, data_range)
1508    };
1509
1510    // Calculate dynamic number of bins based on available width
1511    // This ensures bars fill the horizontal space and look dense at all widths
1512
1513    let y_axis_gap = 1u16; // Minimal gap between labels and plot area (needed to prevent bars from extending outside)
1514    let total_y_axis_space = shared_y_axis_label_width + y_axis_gap;
1515
1516    // Calculate available width for bars - must match Chart widget's plot area exactly
1517    // Chart widget reserves space for Y-axis labels internally, using remaining width for plot
1518    let available_width = area.width.saturating_sub(total_y_axis_space);
1519    let bar_gap = 1u16;
1520    let group_gap = 1u16;
1521    let gap_width = bar_gap + group_gap;
1522
1523    // Target bar width: aim for 6-8 pixels per bar for good density
1524    // Calculate optimal number of bins to fill available width
1525    // Formula: available_width = num_bins * bar_width + (num_bins - 1) * gap_width
1526    // Rearranging: num_bins = (available_width + gap_width) / (bar_width + gap_width)
1527    let target_bar_width = 7.0; // Target bar width in pixels
1528    let optimal_num_bins = ((available_width as f64 + gap_width as f64)
1529        / (target_bar_width + gap_width as f64)) as usize;
1530
1531    // Clamp to reasonable bounds: minimum 5 bins, maximum 60 bins
1532    // Fewer bins for very narrow displays, more bins for wide displays
1533    // Increased max to 60 to better utilize ultrawide displays
1534    let num_bins = optimal_num_bins.clamp(5, 60);
1535
1536    // Use log-scale binning if user has selected log scale and data is positive
1537    // Log-scale binning is standard practice for power law distributions and wide dynamic ranges
1538    // Check actual data values, not histogram range (which may include padding or theoretical bounds)
1539    let all_data_positive = sorted_data.iter().all(|&v| v > 0.0);
1540    // For log scale, ensure hist_min is positive (adjust if needed)
1541    let (log_hist_min, log_hist_max) =
1542        if matches!(histogram_scale, HistogramScale::Log) && all_data_positive {
1543            // Use actual data min/max for log scale to avoid issues with padding or theoretical bounds
1544            let actual_min = sorted_data[0];
1545            let actual_max = sorted_data[sorted_data.len() - 1];
1546            // Ensure minimum is positive for log scale
1547            if actual_min > 0.0 {
1548                (actual_min, actual_max)
1549            } else {
1550                // Can't use log scale if data includes 0
1551                (hist_min, hist_max)
1552            }
1553        } else {
1554            (hist_min, hist_max)
1555        };
1556    let use_log_scale = matches!(histogram_scale, HistogramScale::Log)
1557        && all_data_positive
1558        && log_hist_min > 0.0
1559        && log_hist_max > log_hist_min;
1560
1561    let (bin_boundaries, bin_width): (Vec<f64>, f64) = if use_log_scale {
1562        // Log-scale binning: bins with equal width in log space
1563        // This ensures each bin represents roughly equal multiplicative range
1564        // Use adjusted range based on actual data values
1565        let log_min = log_hist_min.ln();
1566        let log_max = log_hist_max.ln();
1567        let log_range = log_max - log_min;
1568        let log_bin_width = log_range / num_bins as f64;
1569
1570        let boundaries: Vec<f64> = (0..=num_bins)
1571            .map(|i| {
1572                let log_value = log_min + (i as f64) * log_bin_width;
1573                log_value.exp()
1574            })
1575            .collect();
1576
1577        // For log scale, calculate average bin width for use in theoretical PDF calculations
1578        // This is approximate but needed for compatibility
1579        let log_range_linear = log_hist_max - log_hist_min;
1580        let avg_bin_width = log_range_linear / num_bins as f64;
1581        (boundaries, avg_bin_width)
1582    } else {
1583        // Linear binning for all other distributions
1584        let bin_width = hist_range / num_bins as f64;
1585        let boundaries: Vec<f64> = (0..=num_bins)
1586            .map(|i| hist_min + (i as f64) * bin_width)
1587            .collect();
1588        (boundaries, bin_width)
1589    };
1590
1591    // Count data points in each bin
1592    let mut data_bin_counts = vec![0; num_bins];
1593    for &val in sorted_data {
1594        for (i, boundaries) in bin_boundaries.windows(2).enumerate().take(num_bins) {
1595            if val >= boundaries[0]
1596                && (val < boundaries[1] || (i == num_bins - 1 && val <= boundaries[1]))
1597            {
1598                data_bin_counts[i] += 1;
1599                break;
1600            }
1601        }
1602    }
1603
1604    // Calculate theoretical bin probabilities using CDF for the selected distribution
1605    let theory_probs = crate::statistics::calculate_theoretical_bin_probabilities(
1606        dist,
1607        dist_type,
1608        &bin_boundaries,
1609    );
1610
1611    // Convert probabilities to expected counts
1612    let theory_bin_counts: Vec<f64> = theory_probs.iter().map(|&prob| prob * n as f64).collect();
1613
1614    // Normalize values for display (find the maximum for scaling)
1615    let max_data = data_bin_counts.iter().cloned().fold(0, usize::max);
1616    let max_theory = theory_bin_counts.iter().cloned().fold(0.0, f64::max);
1617    let global_max = max_data.max(max_theory as usize).max(1) as f64;
1618
1619    // Use the shared label width calculated in the caller
1620    // This ensures both histogram and Q-Q plot use the same padding for alignment
1621    let y_axis_label_width = shared_y_axis_label_width;
1622
1623    // Recalculate total_y_axis_space using the shared width
1624    let total_y_axis_space = y_axis_label_width + y_axis_gap;
1625
1626    // Bin centers for x-axis positioning (value at center of each bin)
1627    let bin_centers: Vec<f64> = (0..num_bins)
1628        .map(|i| (bin_boundaries[i] + bin_boundaries[i + 1]) / 2.0)
1629        .collect();
1630
1631    // Create data bars - use BarChart for actual bars
1632    let mut data_bars = Vec::new();
1633
1634    for (&data_count, _) in data_bin_counts.iter().zip(bin_centers.iter()) {
1635        // Calculate normalized bar height (0-100 scale for BarChart)
1636        let data_height = if global_max > 0.0 {
1637            ((data_count as f64 / global_max) * 100.0) as u64
1638        } else {
1639            0
1640        };
1641
1642        // No bar labels - Chart widget overlay provides x-axis labels
1643        // This prevents duplicate labels overlapping with Chart's x-axis labels
1644        let data_bar = Bar::default()
1645            .value(data_height)
1646            // Remove text_value to prevent cyan count labels from appearing on bars
1647            // Remove .label() to prevent bar labels from overlapping Chart's x-axis labels
1648            .style(Style::default().fg(theme.get("primary_chart_series_color")));
1649
1650        data_bars.push(data_bar);
1651    }
1652
1653    // Calculate dynamic bar width to use available space
1654    // num_bins is dynamic, so recalculate bar_width to fill the space optimally
1655    // Ensure bars extend all the way to the right edge by using all available width
1656    let total_gaps = (num_bins - 1) as u16 * gap_width;
1657    let total_bar_space = available_width.saturating_sub(total_gaps);
1658
1659    // Calculate bar width to fill available space - ensure minimum width of 1 pixel
1660    // Use floor to ensure we don't exceed available space, but recalculate to use full width
1661    let calculated_bar_width = (total_bar_space as f64 / num_bins as f64).floor() as u16;
1662    let bar_width = calculated_bar_width.max(1);
1663
1664    // Recalculate to ensure we're using full width - adjust if there's leftover space
1665    // This ensures bars extend all the way to the right edge without gaps
1666    let total_used_width = (bar_width * num_bins as u16) + total_gaps;
1667    let remaining_space = available_width.saturating_sub(total_used_width);
1668
1669    // If there's leftover space, distribute it to bars to fill the width completely
1670    // At large widths, ensure all space is utilized by distributing evenly
1671    let final_bar_width = if remaining_space > 0 && num_bins > 0 {
1672        // Distribute all remaining space across bars
1673        // Calculate exact extra width per bar to fill completely
1674        let extra_per_bar = remaining_space / num_bins as u16;
1675        bar_width + extra_per_bar
1676    } else {
1677        bar_width
1678    };
1679
1680    // Render data bars using BarChart
1681    // Create a sub-area for BarChart that matches Chart widget's inner plot area
1682    // This ensures bars align with the theoretical distribution overlay
1683    // Calculate area for bars: need to reserve space for Y-axis labels and x-axis labels
1684    // Chart widget automatically reserves space for both, so we need to match that
1685    // Fixed height for x-axis labels: 1 line (to match Chart widget)
1686    // Note: No borders now, so use area directly (no need for Block::bordered().inner())
1687    // Chart widget with Block title reserves 1 line at top for title
1688    // Block also has 1 line of top padding to separate title from chart content
1689    let title_height = 1u16;
1690    let top_padding = 1u16; // Extra padding below title (from Block padding)
1691    let x_axis_label_height = 1u16;
1692    let chart_inner_top = area.top() + title_height + top_padding; // Start below title and padding
1693    let chart_inner_height = area
1694        .height
1695        .saturating_sub(title_height)
1696        .saturating_sub(top_padding)
1697        .saturating_sub(x_axis_label_height); // Reserve space for title, padding, and x-axis labels
1698
1699    // Shift bar plot area right by 1.5 bar widths so bars align to the right side of their bins
1700    // This ensures proper alignment with the theoretical distribution overlay
1701    // BarChart renders bars starting from the left edge, so shifting the area right will
1702    // make the bars' right edges align with the right edges of their bins
1703    let bar_width_offset = final_bar_width + (final_bar_width / 2); // 1.5 bar widths
1704    let bar_plot_left = area
1705        .left()
1706        .saturating_add(total_y_axis_space)
1707        .saturating_add(bar_width_offset); // Shift right by 1.5 bar widths for right alignment
1708    let bar_plot_width = available_width + bar_width_offset; // Extend width to accommodate shift
1709
1710    let bar_plot_area = Rect::new(
1711        bar_plot_left,      // Shifted right for right-aligned bars
1712        chart_inner_top,    // Start below title
1713        bar_plot_width,     // Extended width to accommodate shift
1714        chart_inner_height, // Use calculated height that accounts for title
1715    );
1716
1717    let barchart = BarChart::default()
1718        .block(Block::default()) // No borders in sub-area - borders handled separately
1719        .data(BarGroup::default().bars(&data_bars))
1720        .bar_width(final_bar_width)
1721        .bar_gap(bar_gap)
1722        .group_gap(group_gap);
1723
1724    // Render bar chart to sub-area matching Chart's plot area (excluding x-axis label space)
1725    // Bars are now right-aligned within their bins
1726    barchart.render(bar_plot_area, buf);
1727
1728    // No border - chart renders without surrounding box
1729
1730    // Overlay theory distribution as dense scatter plot (dot plot) on top of bar chart
1731    // Evaluate theoretical PDF directly at each x point for accurate smooth curve
1732    // This ensures the theoretical distribution shows the correct shape (e.g., bell curve for normal)
1733    // Use very dense sampling for smooth continuous appearance
1734    // Braille markers create 2x4 dot patterns per character, need high density
1735    let num_samples = (available_width as usize * 15).clamp(1500, 10000); // Very dense for smooth Braille lines
1736
1737    let theory_points: Vec<(f64, f64)> = if num_bins > 0
1738        && !theory_bin_counts.is_empty()
1739        && num_samples > 1
1740        && hist_range > 0.0
1741        && dist.characteristics.std_dev > 0.0
1742    {
1743        // Evaluate theoretical PDF directly at each x point for accurate smooth curve
1744        // Get distribution parameters
1745        let mean = dist.characteristics.mean;
1746        let std = dist.characteristics.std_dev;
1747
1748        // Evaluate theoretical PDF directly at each x point for accurate smooth curve
1749        // Sample across the full range, but use a small epsilon to avoid exact boundary conditions
1750        // that can cause issues with domain-restricted distributions (e.g., Gamma at x=0, Beta at x=0 or x=1)
1751        // The epsilon is very small (0.1% of range) so the curve still extends nearly to the edges
1752        let epsilon = hist_range * 0.001; // 0.1% of range - small enough to be visually negligible
1753        let effective_min = hist_min + epsilon;
1754        let effective_max = hist_max - epsilon;
1755        let effective_range = effective_max - effective_min;
1756
1757        (0..num_samples)
1758            .map(|i| {
1759                // Sample x values across the histogram range, avoiding exact boundaries
1760                let x = if num_samples > 1 && effective_range > 0.0 {
1761                    effective_min + (i as f64 / (num_samples - 1) as f64) * effective_range
1762                } else if num_samples > 1 {
1763                    // Fallback if range is too small
1764                    hist_min + (i as f64 / (num_samples - 1) as f64) * hist_range
1765                } else {
1766                    (hist_min + hist_max) / 2.0
1767                };
1768
1769                // Calculate theoretical PDF at x value, then convert to expected count
1770                // PDF gives us density (probability per unit), convert to count: PDF(x) * bin_width * n
1771                let theory_count = match dist_type {
1772                    DistributionType::Normal => {
1773                        // Normal PDF: (1 / (σ * sqrt(2π))) * exp(-0.5 * ((x - μ) / σ)²)
1774                        let z = (x - mean) / std;
1775                        let pdf = (1.0 / (std * (2.0 * std::f64::consts::PI).sqrt()))
1776                            * (-0.5 * z * z).exp();
1777                        pdf * bin_width * n as f64
1778                    }
1779                    DistributionType::LogNormal => {
1780                        // LogNormal PDF: show theoretical distribution over [0, ∞) even if data is negative
1781                        if x > 0.0 {
1782                            let (mu, sigma) = if mean > 0.0 && std >= 0.0 {
1783                                let variance = std * std;
1784                                let sigma_sq = (1.0 + variance / (mean * mean)).ln();
1785                                let mu_val = mean.ln() - sigma_sq / 2.0;
1786                                let sigma_val = sigma_sq.sqrt();
1787                                (mu_val, sigma_val)
1788                            } else {
1789                                // Data doesn't match LogNormal: use default parameters (mu=0, sigma=1)
1790                                (0.0, 1.0)
1791                            };
1792                            let z = (x.ln() - mu) / sigma;
1793                            let pdf = (1.0 / (x * sigma * (2.0 * std::f64::consts::PI).sqrt()))
1794                                * (-0.5 * z * z).exp();
1795                            pdf * bin_width * n as f64
1796                        } else {
1797                            // LogNormal is strictly positive, return 0 for x <= 0
1798                            0.0
1799                        }
1800                    }
1801                    DistributionType::Exponential => {
1802                        // Exponential PDF: show theoretical distribution over [0, ∞) even if data is negative
1803                        if x >= 0.0 {
1804                            let lambda = if mean > 0.0 {
1805                                1.0 / mean
1806                            } else {
1807                                // Data doesn't match Exponential: use default lambda=1
1808                                1.0
1809                            };
1810                            let pdf = lambda * (-lambda * x).exp();
1811                            pdf * bin_width * n as f64
1812                        } else {
1813                            // Exponential is strictly non-negative, return 0 for x < 0
1814                            0.0
1815                        }
1816                    }
1817                    DistributionType::Uniform => {
1818                        if !sorted_data.is_empty() && x >= data_min && x <= data_max {
1819                            let data_range = data_max - data_min;
1820                            if data_range > 0.0 {
1821                                let pdf = 1.0 / data_range;
1822                                pdf * bin_width * n as f64
1823                            } else {
1824                                0.0
1825                            }
1826                        } else {
1827                            0.0
1828                        }
1829                    }
1830                    DistributionType::Gamma => {
1831                        // Gamma PDF: evaluate directly for smooth curve
1832                        // Show theoretical distribution over its valid domain [0, ∞) even if data is negative
1833                        if x > 0.0 {
1834                            let variance = std * std;
1835                            let (shape, scale) = if mean > 0.0 && variance > 0.0 {
1836                                let s = (mean * mean) / variance;
1837                                let sc = variance / mean;
1838                                if s > 0.0 && sc > 0.0 {
1839                                    (s, sc)
1840                                } else {
1841                                    // Invalid parameters: use default (exponential with scale=1)
1842                                    (1.0, 1.0)
1843                                }
1844                            } else {
1845                                // Data doesn't match Gamma (e.g., negative mean): use default parameters
1846                                // This ensures we still show the theoretical distribution shape
1847                                (1.0, 1.0)
1848                            };
1849                            let pdf = gamma_pdf(x, shape, scale);
1850                            pdf * bin_width * n as f64
1851                        } else {
1852                            // Gamma is strictly non-negative, return 0 for x <= 0
1853                            0.0
1854                        }
1855                    }
1856                    DistributionType::Geometric => {
1857                        // Geometric PMF: evaluate directly for smooth curve
1858                        if x >= 0.0 && mean > 0.0 {
1859                            let p_param = 1.0 / (mean + 1.0);
1860                            if p_param > 0.0 && p_param < 1.0 {
1861                                // Use PMF for continuous approximation
1862                                let pmf = geometric_pmf(x, p_param);
1863                                // Convert PMF to expected count: PMF * n
1864                                // Note: For discrete distributions, we use PMF directly rather than PDF * bin_width
1865                                pmf * n as f64
1866                            } else {
1867                                0.0
1868                            }
1869                        } else {
1870                            0.0
1871                        }
1872                    }
1873                    DistributionType::Weibull => {
1874                        // Weibull PDF: evaluate directly for smooth curve
1875                        if x > 0.0 && mean > 0.0 && std > 0.0 {
1876                            // Approximate shape from CV
1877                            let cv = std / mean;
1878                            let shape = if cv < 1.0 { 1.0 / cv } else { 1.0 };
1879                            // Scale from mean
1880                            let gamma_1_over_shape = 1.0 + 1.0 / shape; // Approximation
1881                            let scale = mean / gamma_1_over_shape;
1882                            if shape > 0.0 && scale > 0.0 {
1883                                let pdf = weibull_pdf(x, shape, scale);
1884                                pdf * bin_width * n as f64
1885                            } else {
1886                                0.0
1887                            }
1888                        } else {
1889                            0.0
1890                        }
1891                    }
1892                    DistributionType::Beta => {
1893                        // Beta PDF: evaluate directly for smooth curve
1894                        if x > 0.0 && x < 1.0 {
1895                            let variance = std * std;
1896                            let mean_val = mean;
1897                            if mean_val > 0.0 && mean_val < 1.0 && variance > 0.0 {
1898                                let max_var = mean_val * (1.0 - mean_val);
1899                                if variance < max_var {
1900                                    // Estimate alpha and beta using method of moments
1901                                    let sum = mean_val * (1.0 - mean_val) / variance - 1.0;
1902                                    let alpha = mean_val * sum;
1903                                    let beta = (1.0 - mean_val) * sum;
1904                                    if alpha > 0.0 && beta > 0.0 {
1905                                        let pdf = beta_pdf(x, alpha, beta);
1906                                        pdf * bin_width * n as f64
1907                                    } else {
1908                                        0.0
1909                                    }
1910                                } else {
1911                                    0.0
1912                                }
1913                            } else {
1914                                0.0
1915                            }
1916                        } else {
1917                            0.0
1918                        }
1919                    }
1920                    DistributionType::ChiSquared => {
1921                        // ChiSquared PDF: evaluate directly for smooth curve (uses gamma_pdf)
1922                        if x > 0.0 {
1923                            let df = mean; // For chi-squared, mean = df
1924                            if df > 0.0 {
1925                                let pdf = chi_squared_pdf(x, df);
1926                                pdf * bin_width * n as f64
1927                            } else {
1928                                0.0
1929                            }
1930                        } else {
1931                            0.0
1932                        }
1933                    }
1934                    DistributionType::StudentsT => {
1935                        // StudentsT PDF: evaluate directly for smooth curve
1936                        let variance = std * std;
1937                        let df = if variance > 1.0 {
1938                            2.0 * variance / (variance - 1.0)
1939                        } else {
1940                            30.0
1941                        };
1942                        if df > 0.0 {
1943                            // StudentsT is centered at mean, but PDF is typically for standard t (mean=0, std=1)
1944                            // Adjust x to account for data mean and scale
1945                            let x_standardized = if std > 0.0 { (x - mean) / std } else { 0.0 };
1946                            let pdf_standard = students_t_pdf(x_standardized, df);
1947                            // Convert back to data scale: PDF_standard / std
1948                            let pdf = if std > 0.0 { pdf_standard / std } else { 0.0 };
1949                            pdf * bin_width * n as f64
1950                        } else {
1951                            0.0
1952                        }
1953                    }
1954                    DistributionType::PowerLaw => {
1955                        // PowerLaw: use bin-based values from CDF calculations
1956                        // Power law PDF is complex and depends on x_min parameter
1957                        // For log-scale binning, find which bin x belongs to using binary search
1958                        if use_log_scale && x > 0.0 {
1959                            // Binary search to find the correct bin for log-scale boundaries
1960                            let mut left = 0;
1961                            let mut right = num_bins;
1962                            while left < right {
1963                                let mid = (left + right) / 2;
1964                                if x < bin_boundaries[mid] {
1965                                    right = mid;
1966                                } else {
1967                                    left = mid + 1;
1968                                }
1969                            }
1970                            let bin_idx = if left > 0 { left - 1 } else { 0 };
1971                            if bin_idx < num_bins {
1972                                theory_bin_counts[bin_idx]
1973                            } else {
1974                                theory_bin_counts[num_bins - 1]
1975                            }
1976                        } else {
1977                            // Linear binning fallback
1978                            let bin_idx = ((x - hist_min) / bin_width).floor() as usize;
1979                            if bin_idx < num_bins {
1980                                theory_bin_counts[bin_idx]
1981                            } else if bin_idx == num_bins {
1982                                theory_bin_counts[num_bins - 1]
1983                            } else {
1984                                0.0
1985                            }
1986                        }
1987                    }
1988                    // REMOVED: All individual PDF implementations below caused issues with plateaus
1989                    // Keeping only the bin-based approach above which uses CDF-calculated values
1990                    _ => {
1991                        // Fallback: Use bin-based approach for distributions without PDF implementation
1992                        let bin_idx = ((x - hist_min) / bin_width).floor() as usize;
1993                        let bin_idx = bin_idx.min(num_bins - 1);
1994                        if bin_idx < theory_bin_counts.len() {
1995                            theory_bin_counts[bin_idx]
1996                        } else {
1997                            0.0
1998                        }
1999                    }
2000                };
2001                let normalized_height = if global_max > 0.0 {
2002                    (theory_count / global_max) * 100.0
2003                } else {
2004                    0.0
2005                };
2006                (x, normalized_height)
2007            })
2008            .collect()
2009    } else {
2010        // Fallback: use bin centers with theory_bin_counts if PDF evaluation fails
2011        let theory_normalized_heights: Vec<f64> = theory_bin_counts
2012            .iter()
2013            .map(|&theory_count| {
2014                if global_max > 0.0 {
2015                    (theory_count / global_max) * 100.0
2016                } else {
2017                    0.0
2018                }
2019            })
2020            .collect();
2021        bin_centers
2022            .iter()
2023            .zip(theory_normalized_heights.iter())
2024            .map(|(&bin_center, &normalized_height)| (bin_center, normalized_height))
2025            .collect()
2026    };
2027
2028    // Create scatter plot dataset for theoretical distribution
2029    // Use Braille marker for dense, continuous appearance
2030    let marker = symbols::Marker::Braille;
2031
2032    let theory_dataset = Dataset::default()
2033        .name("") // Empty name to prevent legend from appearing
2034        .marker(marker)
2035        .graph_type(GraphType::Scatter)
2036        .style(Style::default().fg(theme.get("secondary_chart_series_color")))
2037        .data(&theory_points);
2038
2039    // Create Chart widget with scatter plot overlay
2040    // Configure axes to match BarChart coordinate system exactly:
2041    // - X-axis: range (hist_min to hist_max) - matches bin range
2042    // - Y-axis: normalized height range (0 to 100) - matches bar normalization
2043    // Use same border style as BarChart for coordinate alignment
2044    // Add x-axis labels with more tick marks for better readability
2045    // Use same x-axis label format as Q-Q plot: 3 labels (min, middle, max) with {:.1} formatting
2046    // Use histogram range values to align with bars
2047    // hist_min is already clamped to >= 0 for non-negative data, so use it directly
2048    let x_labels = vec![
2049        Span::styled(
2050            format!("{:.1}", hist_min),
2051            Style::default()
2052                .fg(theme.get("text_secondary"))
2053                .add_modifier(Modifier::BOLD),
2054        ),
2055        Span::raw(format!("{:.1}", (hist_min + hist_max) / 2.0)),
2056        Span::styled(
2057            format!("{:.1}", hist_max),
2058            Style::default()
2059                .fg(theme.get("text_secondary"))
2060                .add_modifier(Modifier::BOLD),
2061        ),
2062    ];
2063
2064    let theory_chart = Chart::new(vec![theory_dataset])
2065        .block(
2066            Block::default()
2067                .title("Histogram")
2068                .title_alignment(ratatui::layout::Alignment::Center)
2069                .padding(ratatui::widgets::Padding::new(1, 0, 0, 0)), // Extra top padding to separate title from chart
2070        )
2071        .x_axis(
2072            Axis::default()
2073                .bounds([hist_min, hist_max]) // Use histogram range to align with bars (hist_min already clamped for non-negative data)
2074                .style(Style::default().fg(theme.get("text_secondary")))
2075                .labels(x_labels), // Show x-axis labels with histogram range
2076        )
2077        .y_axis(
2078            Axis::default()
2079                .title("Counts")
2080                .style(Style::default().fg(theme.get("text_secondary")))
2081                .bounds([0.0, 100.0])
2082                .labels({
2083                    // Use dynamic label width calculated earlier
2084                    // y_axis_label_width already includes +1 for padding, so use it directly for formatting
2085                    // This ensures alignment with Q-Q plot using actual label lengths
2086                    let label_width = y_axis_label_width as usize;
2087                    vec![
2088                        // Bottom label: 0 counts (right-aligned to fixed width)
2089                        Span::styled(
2090                            format!("{:>width$}", 0, width = label_width),
2091                            Style::default()
2092                                .fg(theme.get("text_secondary"))
2093                                .add_modifier(Modifier::BOLD),
2094                        ),
2095                        // Middle label: half of max counts (right-aligned)
2096                        Span::styled(
2097                            format!(
2098                                "{:>width$}",
2099                                (global_max / 2.0) as usize,
2100                                width = label_width
2101                            ),
2102                            Style::default().fg(theme.get("text_secondary")),
2103                        ),
2104                        // Top label: max counts (right-aligned)
2105                        Span::styled(
2106                            format!("{:>width$}", global_max as usize, width = label_width),
2107                            Style::default()
2108                                .fg(theme.get("text_secondary"))
2109                                .add_modifier(Modifier::BOLD),
2110                        ),
2111                    ]
2112                }),
2113        )
2114        .hidden_legend_constraints((Constraint::Length(0), Constraint::Length(0))); // Hide legend
2115
2116    // Render Chart overlay to full area (no borders)
2117    // Chart widget will automatically handle its own inner layout for x-axis labels
2118    theory_chart.render(area, buf);
2119}
2120
2121// REMOVED ALL DUPLICATE PDF CODE - it was causing plateaus and jumps
2122// The bin-based approach using CDF-calculated theory_bin_counts works better
2123
2124fn render_qq_plot(
2125    dist: &DistributionAnalysis,
2126    dist_type: DistributionType,
2127    area: Rect,
2128    buf: &mut Buffer,
2129    shared_y_axis_label_width: u16,
2130    theme: &Theme,
2131    unified_x_range: Option<(f64, f64)>,
2132) {
2133    // Use Chart widget for Q-Q plot: Data quantiles vs Theoretical quantiles
2134    // Use sorted_sample_values and position-based quantiles (not just 5 percentiles)
2135    let sorted_data = &dist.sorted_sample_values;
2136
2137    if sorted_data.is_empty() || sorted_data.len() < 3 {
2138        Paragraph::new("Insufficient data for Q-Q plot (need at least 3 points)")
2139            .centered()
2140            .render(area, buf);
2141        return;
2142    }
2143
2144    let n = sorted_data.len();
2145
2146    // Calculate Q-Q plot data points using position-based quantiles
2147    // For each position i, probability p = (i+1)/(n+1), theoretical quantile at p, data quantile = sorted_data[i]
2148    let qq_data: Vec<(f64, f64)> = sorted_data
2149        .iter()
2150        .enumerate()
2151        .map(|(i, &data_value)| {
2152            let position = i + 1; // 1-based position
2153            let probability = (position as f64) / (n as f64 + 1.0);
2154            let theoretical_quantile =
2155                calculate_theoretical_quantile_at_probability(dist, dist_type, probability);
2156            (theoretical_quantile, data_value)
2157        })
2158        .collect();
2159
2160    // Find data ranges for both axes
2161    // X-axis (Theoretical): calculated from probability percentiles via inverse CDF
2162    // Y-axis (Empirical): raw sorted sample data (preserve all values, even if "impossible")
2163    let theory_min = qq_data
2164        .iter()
2165        .map(|(t, _)| *t)
2166        .fold(f64::INFINITY, f64::min);
2167    let theory_max = qq_data
2168        .iter()
2169        .map(|(t, _)| *t)
2170        .fold(f64::NEG_INFINITY, f64::max);
2171    let theory_range = theory_max - theory_min;
2172
2173    let data_min = qq_data
2174        .iter()
2175        .map(|(_, d)| *d)
2176        .fold(f64::INFINITY, f64::min);
2177    let data_max = qq_data
2178        .iter()
2179        .map(|(_, d)| *d)
2180        .fold(f64::NEG_INFINITY, f64::max);
2181    let data_range = data_max - data_min;
2182
2183    // Only require data_range > 0 (allow plotting even if theoretical range is small/zero)
2184    // This handles cases where distribution doesn't match (e.g., negative data vs strictly positive distribution)
2185    if data_range <= 0.0 {
2186        Paragraph::new("Insufficient data range for Q-Q plot")
2187            .centered()
2188            .render(area, buf);
2189        return;
2190    }
2191
2192    // Use unified X-axis range if provided for visual alignment with histogram
2193    // Otherwise, handle case where all theoretical quantiles are the same (theory_range = 0)
2194    let (theory_min_plot, theory_max_plot) =
2195        if let Some((unified_min, unified_max)) = unified_x_range {
2196            // Use unified range to align with histogram
2197            (unified_min, unified_max)
2198        } else if theory_range <= 0.0 || !theory_min.is_finite() || !theory_max.is_finite() {
2199            // Fallback: use data range (no padding)
2200            (data_min, data_max)
2201        } else {
2202            // Use theoretical range, but clamp to data range to keep charts in sync
2203            (theory_min.max(data_min), theory_max.min(data_max))
2204        };
2205
2206    // Create robust reference line through Q1 and Q3 quartiles
2207    // This works even when domains don't overlap (e.g., negative data vs positive distribution)
2208    let q1_idx = (n as f64 * 0.25).floor() as usize;
2209    let q3_idx = (n as f64 * 0.75).floor() as usize;
2210    let q1_idx = q1_idx.min(n - 1);
2211    let q3_idx = q3_idx.min(n - 1);
2212
2213    let (theory_q1, data_q1) = if q1_idx < qq_data.len() {
2214        qq_data[q1_idx]
2215    } else {
2216        qq_data[0]
2217    };
2218    let (theory_q3, data_q3) = if q3_idx < qq_data.len() {
2219        qq_data[q3_idx]
2220    } else {
2221        qq_data[qq_data.len() - 1]
2222    };
2223
2224    // Calculate robust reference line through (theory_q1, data_q1) and (theory_q3, data_q3)
2225    // This works even when domains don't overlap (e.g., negative data vs positive distribution)
2226    let theory_diff = theory_q3 - theory_q1;
2227    let reference_line = if theory_diff.abs() > 1e-10 {
2228        // Normal case: calculate slope and extend line to cover plot range (no padding)
2229        let slope = (data_q3 - data_q1) / theory_diff;
2230        let x_start = theory_min_plot;
2231        let x_end = theory_max_plot;
2232        let y_start = slope * (x_start - theory_q1) + data_q1;
2233        let y_end = slope * (x_end - theory_q1) + data_q1;
2234        vec![(x_start, y_start), (x_end, y_end)]
2235    } else {
2236        // Degenerate case: all theoretical quantiles are the same (theory_range ≈ 0)
2237        // Use horizontal line through data median to show the mismatch (no padding)
2238        let y_median = (data_q1 + data_q3) / 2.0;
2239        vec![(theory_min_plot, y_median), (theory_max_plot, y_median)]
2240    };
2241
2242    // Create datasets
2243    // Use appropriate marker based on point density
2244    let marker = if qq_data.len() > 100 {
2245        symbols::Marker::Braille // Better for dense scatter plots
2246    } else {
2247        symbols::Marker::Dot
2248    };
2249
2250    let datasets = vec![
2251        // Diagonal reference line
2252        Dataset::default()
2253            .name("") // Empty name to hide from legend
2254            .marker(marker)
2255            .style(Style::default().fg(theme.get("secondary_chart_series_color")))
2256            .graph_type(GraphType::Line)
2257            .data(&reference_line),
2258        // Q-Q plot data points
2259        Dataset::default()
2260            .name("") // Empty name to hide from legend
2261            .marker(marker)
2262            .style(Style::default().fg(theme.get("primary_chart_series_color")))
2263            .graph_type(GraphType::Scatter)
2264            .data(&qq_data),
2265    ];
2266
2267    // Create X-axis labels using plot range
2268    let x_labels = vec![
2269        Span::styled(
2270            format!("{:.1}", theory_min_plot),
2271            Style::default().add_modifier(Modifier::BOLD),
2272        ),
2273        Span::raw(format!("{:.1}", (theory_min_plot + theory_max_plot) / 2.0)),
2274        Span::styled(
2275            format!("{:.1}", theory_max_plot),
2276            Style::default().add_modifier(Modifier::BOLD),
2277        ),
2278    ];
2279
2280    // Use the shared label width calculated in the caller
2281    // This ensures both histogram and Q-Q plot use the same padding for alignment
2282    let label_width = shared_y_axis_label_width as usize;
2283    let y_labels = vec![
2284        // Bottom label: data_min (right-aligned to fixed width)
2285        Span::styled(
2286            format!("{:>width$.1}", data_min, width = label_width),
2287            Style::default().add_modifier(Modifier::BOLD),
2288        ),
2289        // Middle label: average (right-aligned)
2290        Span::raw(format!(
2291            "{:>width$.1}",
2292            (data_min + data_max) / 2.0,
2293            width = label_width
2294        )),
2295        // Top label: data_max (right-aligned)
2296        Span::styled(
2297            format!("{:>width$.1}", data_max, width = label_width),
2298            Style::default().add_modifier(Modifier::BOLD),
2299        ),
2300    ];
2301
2302    let chart = Chart::new(datasets)
2303        .block(
2304            Block::default()
2305                .title("Q-Q Plot")
2306                .title_alignment(ratatui::layout::Alignment::Center)
2307                .padding(ratatui::widgets::Padding::new(1, 0, 0, 0)), // Extra top padding to separate title from chart
2308        )
2309        .x_axis(
2310            Axis::default()
2311                .title("Theoretical Values")
2312                .style(Style::default().fg(theme.get("text_secondary")))
2313                .bounds([theory_min_plot, theory_max_plot])
2314                .labels(x_labels),
2315        )
2316        .y_axis(
2317            Axis::default()
2318                .title("Data Values")
2319                .style(Style::default().fg(theme.get("text_secondary"))) // Axis line should be gray
2320                .bounds([data_min, data_max])
2321                .labels(y_labels), // Labels styled cyan explicitly above
2322        )
2323        .hidden_legend_constraints((Constraint::Length(0), Constraint::Length(0))); // Hide legend
2324
2325    chart.render(area, buf);
2326}
2327
2328fn render_condensed_statistics(
2329    dist: &DistributionAnalysis,
2330    _selected_dist_type: DistributionType,
2331    area: Rect,
2332    buf: &mut Buffer,
2333    theme: &Theme,
2334) {
2335    // Display statistics in single line: SW score, skew, kurtosis, median, mean, std, CV
2336    // Use explicit theme colors so text is always visible (avoids black-on-black for some themes)
2337    let chars = &dist.characteristics;
2338    let label_style = Style::default().fg(theme.get("text_primary"));
2339    let value_style = Style::default().fg(theme.get("text_primary"));
2340
2341    let mut line_parts = Vec::new();
2342
2343    if let (Some(sw_stat), Some(sw_p)) = (chars.shapiro_wilk_stat, chars.shapiro_wilk_pvalue) {
2344        line_parts.push(Span::styled("SW: ", label_style));
2345        line_parts.push(Span::styled(
2346            format!("{:.3} (p={:.3})", sw_stat, sw_p),
2347            value_style,
2348        ));
2349        line_parts.push(Span::styled(" ", value_style));
2350    }
2351
2352    line_parts.push(Span::styled("Skew: ", label_style));
2353    line_parts.push(Span::styled(format!("{:.2}", chars.skewness), value_style));
2354    line_parts.push(Span::styled(" ", value_style));
2355
2356    line_parts.push(Span::styled("Kurt: ", label_style));
2357    line_parts.push(Span::styled(format!("{:.2}", chars.kurtosis), value_style));
2358    line_parts.push(Span::styled(" ", value_style));
2359
2360    line_parts.push(Span::styled("Median: ", label_style));
2361    line_parts.push(Span::styled(
2362        format!("{:.2}", dist.percentiles.p50),
2363        value_style,
2364    ));
2365    line_parts.push(Span::styled(" ", value_style));
2366
2367    line_parts.push(Span::styled("Mean: ", label_style));
2368    line_parts.push(Span::styled(format!("{:.2}", chars.mean), value_style));
2369    line_parts.push(Span::styled(" ", value_style));
2370
2371    line_parts.push(Span::styled("Std: ", label_style));
2372    line_parts.push(Span::styled(format!("{:.2}", chars.std_dev), value_style));
2373    line_parts.push(Span::styled(" ", value_style));
2374
2375    line_parts.push(Span::styled("CV: ", label_style));
2376    line_parts.push(Span::styled(
2377        format!("{:.3}", chars.coefficient_of_variation),
2378        value_style,
2379    ));
2380
2381    let line = Line::from(line_parts);
2382    let lines = vec![line];
2383
2384    Paragraph::new(lines).render(area, buf);
2385}
2386
2387// Calculate theoretical quantile at any probability (for Q-Q plots)
2388pub fn calculate_theoretical_quantile_at_probability(
2389    dist: &DistributionAnalysis,
2390    dist_type: DistributionType,
2391    probability: f64,
2392) -> f64 {
2393    let chars = &dist.characteristics;
2394    let p = probability.clamp(0.0, 1.0); // Clamp to [0, 1]
2395
2396    match dist_type {
2397        DistributionType::Normal => {
2398            let z = approximate_normal_quantile(p);
2399            chars.mean + chars.std_dev * z
2400        }
2401        DistributionType::LogNormal => {
2402            let z = approximate_normal_quantile(p);
2403            // Convert from mean (m) and std dev (s) on original scale to lognormal parameters (μ, σ)
2404            // Where X ~ Lognormal(μ, σ²) means ln(X) ~ Normal(μ, σ)
2405            // Formulas: σ = sqrt(ln(1 + s²/m²)), μ = ln(m) - σ²/2
2406            // Quantile: q(p) = exp(μ + σ*z)
2407            // Even if data doesn't match (e.g., negative values), still calculate quantiles over [0, ∞)
2408            let m = chars.mean;
2409            let s = chars.std_dev;
2410            if m > 0.0 && s >= 0.0 {
2411                let variance = s * s;
2412                let sigma = (1.0 + variance / (m * m)).ln().sqrt();
2413                let mu = m.ln() - (sigma * sigma) / 2.0;
2414                (mu + sigma * z).exp()
2415            } else {
2416                // Data doesn't match LogNormal (e.g., negative mean): use default parameters
2417                // Default: mu=0, sigma=1 gives mean≈1.65, which provides a reasonable range
2418                (z).exp()
2419            }
2420        }
2421        DistributionType::Uniform => {
2422            // Estimate min/max from mean and std: for uniform, std = (max-min) / sqrt(12)
2423            let range = chars.std_dev * (12.0_f64).sqrt();
2424            let min_est = chars.mean - range / 2.0;
2425            let max_est = chars.mean + range / 2.0;
2426            min_est + (max_est - min_est) * p
2427        }
2428        DistributionType::Exponential => {
2429            // Exponential quantile: q(p) = -ln(1-p) / lambda, where lambda = 1/mean
2430            // Even if data doesn't match (e.g., negative values), still calculate quantiles over [0, ∞)
2431            if chars.mean > 0.0 {
2432                -chars.mean * (1.0 - p).ln()
2433            } else {
2434                // Data doesn't match Exponential (e.g., negative mean): use default lambda=1
2435                // This ensures we still get a range of quantiles
2436                -(1.0 - p).ln()
2437            }
2438        }
2439        DistributionType::Beta => {
2440            // Beta quantile: use approximation
2441            // Estimate parameters from mean and variance
2442            let mean = chars.mean;
2443            let variance = chars.std_dev * chars.std_dev;
2444            if mean > 0.0 && mean < 1.0 && variance > 0.0 {
2445                let max_var = mean * (1.0 - mean);
2446                if variance < max_var {
2447                    // Estimate alpha and beta using method of moments
2448                    let sum = mean * (1.0 - mean) / variance - 1.0;
2449                    let alpha = mean * sum;
2450                    let beta = (1.0 - mean) * sum;
2451                    if alpha > 0.0 && beta > 0.0 && alpha + beta > 50.0 {
2452                        // Normal approximation
2453                        let normal_mean = alpha / (alpha + beta);
2454                        let normal_std = ((alpha * beta)
2455                            / ((alpha + beta).powi(2) * (alpha + beta + 1.0)))
2456                            .sqrt();
2457                        let z = approximate_normal_quantile(p);
2458                        normal_mean + normal_std * z
2459                    } else {
2460                        // Use simple linear interpolation across [0, 1] range
2461                        // Clamp to [0, 1] for beta distribution
2462                        p.clamp(0.0, 1.0)
2463                    }
2464                } else {
2465                    // Use linear interpolation across [0, 1] range
2466                    p.clamp(0.0, 1.0)
2467                }
2468            } else {
2469                // Fallback: use empirical percentile interpolation
2470                interpolate_empirical_quantile(dist, p)
2471            }
2472        }
2473        DistributionType::Gamma => {
2474            // Gamma quantile: estimate parameters and use proper quantile function
2475            // Even if data doesn't match (e.g., negative values), still calculate quantiles
2476            // over the distribution's natural domain [0, ∞)
2477            let mean = chars.mean;
2478            let variance = chars.std_dev * chars.std_dev;
2479            if mean > 0.0 && variance > 0.0 {
2480                let shape = (mean * mean) / variance;
2481                let scale = variance / mean;
2482                // Check for edge cases: very small shape or very large scale can cause numerical issues
2483                // Also check if parameters are reasonable (shape >= 0.01, scale < 1e6)
2484                if shape > 0.01
2485                    && scale > 0.0
2486                    && scale < 1e6
2487                    && shape.is_finite()
2488                    && scale.is_finite()
2489                {
2490                    gamma_quantile(p, shape, scale)
2491                } else {
2492                    // Invalid or extreme parameters: use default Gamma distribution to still show a range
2493                    // Use shape=1 (exponential) with reasonable scale
2494                    let default_scale = if mean > 0.0 && mean < 1e6 {
2495                        mean.max(0.1) // Ensure scale is reasonable
2496                    } else {
2497                        1.0
2498                    };
2499                    gamma_quantile(p, 1.0, default_scale)
2500                }
2501            } else {
2502                // Data doesn't match Gamma (e.g., negative mean): use default parameters
2503                // This ensures we still get a range of quantiles over [0, ∞)
2504                let default_scale = 1.0;
2505                gamma_quantile(p, 1.0, default_scale)
2506            }
2507        }
2508        DistributionType::ChiSquared => {
2509            // Chi-squared quantile: special case of gamma with shape = df/2, scale = 2
2510            // Estimate df from mean (mean = df for chi-squared)
2511            // Even if data doesn't match (e.g., negative values), still calculate quantiles over [0, ∞)
2512            let df = chars.mean;
2513            if df > 0.0 {
2514                if df > 30.0 {
2515                    // Normal approximation
2516                    let normal_mean = df;
2517                    let normal_std = (2.0 * df).sqrt();
2518                    let z = approximate_normal_quantile(p);
2519                    (normal_mean + normal_std * z).max(0.0)
2520                } else {
2521                    // Use gamma quantile with shape = df/2, scale = 2
2522                    gamma_quantile(p, df / 2.0, 2.0)
2523                }
2524            } else {
2525                // Data doesn't match ChiSquared (e.g., negative mean): use default df=1
2526                gamma_quantile(p, 0.5, 2.0)
2527            }
2528        }
2529        DistributionType::StudentsT => {
2530            // Student's t quantile: for large df, approximate with normal
2531            // Estimate df from variance (variance = df/(df-2) for t-distribution)
2532            let variance = chars.std_dev * chars.std_dev;
2533            let df = if variance > 1.0 {
2534                2.0 * variance / (variance - 1.0)
2535            } else {
2536                30.0
2537            };
2538            if df > 30.0 {
2539                // Normal approximation
2540                let z = approximate_normal_quantile(p);
2541                chars.mean + chars.std_dev * z
2542            } else {
2543                // For small df, use normal approximation anyway (better than constant)
2544                let z = approximate_normal_quantile(p);
2545                chars.mean + chars.std_dev * z
2546            }
2547        }
2548        DistributionType::Poisson => {
2549            // Poisson quantile: use normal approximation for large lambda
2550            // Even if data doesn't match (e.g., negative values), still calculate quantiles over [0, ∞)
2551            let lambda = chars.mean;
2552            if lambda > 0.0 {
2553                if lambda > 20.0 {
2554                    // Normal approximation for large lambda
2555                    let z = approximate_normal_quantile(p);
2556                    (lambda + z * lambda.sqrt()).max(0.0)
2557                } else {
2558                    // For small lambda, use normal approximation anyway to get a range
2559                    // This ensures we still get quantiles even when lambda is small
2560                    let z = approximate_normal_quantile(p);
2561                    (lambda + z * lambda.sqrt()).max(0.0)
2562                }
2563            } else {
2564                // Data doesn't match Poisson (e.g., negative mean): use default lambda=10
2565                // This ensures we still get a range of quantiles
2566                let default_lambda: f64 = 10.0;
2567                let z = approximate_normal_quantile(p);
2568                (default_lambda + z * default_lambda.sqrt()).max(0.0)
2569            }
2570        }
2571        DistributionType::Bernoulli => {
2572            // Bernoulli quantile: simple binary
2573            // For Bernoulli, quantile function is: 0 if p < (1-p_param), 1 otherwise
2574            // But to get a range for Q-Q plot, use a continuous approximation
2575            // We'll use linear interpolation between 0 and 1 based on probability
2576            let mean = chars.mean; // mean = p_param for Bernoulli
2577            if mean <= 0.0 {
2578                // Degenerate case: all 0s
2579                interpolate_empirical_quantile(dist, p)
2580            } else if mean >= 1.0 {
2581                // Degenerate case: all 1s
2582                interpolate_empirical_quantile(dist, p)
2583            } else {
2584                // For Q-Q plot, use a continuous approximation
2585                // Map probability to [0, 1] range linearly
2586                // This gives us a range even though Bernoulli is discrete
2587                let threshold = 1.0 - mean;
2588                if p < threshold {
2589                    0.0
2590                } else if p > mean {
2591                    1.0
2592                } else {
2593                    // Interpolate in the middle range for smoother Q-Q plot
2594                    (p - threshold) / (mean - threshold) * (1.0 - 0.0)
2595                }
2596            }
2597        }
2598        DistributionType::Binomial => {
2599            // Binomial quantile: use normal approximation
2600            // Even if data doesn't match, still calculate quantiles to show a range
2601            let mean = chars.mean;
2602            let variance = chars.std_dev * chars.std_dev;
2603            if variance > 0.0 {
2604                let z = approximate_normal_quantile(p);
2605                (mean + z * variance.sqrt()).max(0.0)
2606            } else {
2607                // No variance: use default parameters to still show a range
2608                // Estimate n from mean (assuming p=0.5 for default)
2609                let default_n = (mean * 2.0).max(10.0);
2610                let default_p = 0.5;
2611                let default_mean = default_n * default_p;
2612                let default_variance = default_n * default_p * (1.0 - default_p);
2613                let z = approximate_normal_quantile(p);
2614                (default_mean + z * default_variance.sqrt()).max(0.0)
2615            }
2616        }
2617        DistributionType::Geometric => {
2618            // Geometric quantile: use proper quantile function
2619            let mean = chars.mean; // mean = (1-p)/p for geometric
2620            if mean > 0.0 {
2621                let p_param = 1.0 / (mean + 1.0);
2622                if p_param > 0.0 && p_param < 1.0 {
2623                    geometric_quantile(p, p_param)
2624                } else {
2625                    // Fallback: use empirical percentile interpolation
2626                    interpolate_empirical_quantile(dist, p)
2627                }
2628            } else {
2629                // Fallback: use empirical percentile interpolation
2630                interpolate_empirical_quantile(dist, p)
2631            }
2632        }
2633        DistributionType::Weibull => {
2634            // Weibull quantile: q(p) = scale * (-ln(1-p))^(1/shape)
2635            // Estimate parameters from data characteristics
2636            // Even if data doesn't match (e.g., negative values), still calculate quantiles over [0, ∞)
2637            let sorted_data = &dist.sorted_sample_values;
2638            let mean = chars.mean;
2639            let variance = chars.std_dev * chars.std_dev;
2640
2641            let (shape_est, scale_est) = if !sorted_data.is_empty()
2642                && sorted_data[0] > 0.0
2643                && mean > 0.0
2644                && variance > 0.0
2645            {
2646                // Estimate shape and scale from data
2647                // Approximate shape from CV
2648                let cv = chars.std_dev / mean;
2649                let shape = if cv < 1.0 {
2650                    // Approximation for shape parameter
2651                    1.0 / cv
2652                } else {
2653                    1.0
2654                };
2655                // Scale from mean
2656                let gamma_1_over_shape = 1.0 + 1.0 / shape; // Approximation
2657                let scale = mean / gamma_1_over_shape;
2658                if scale > 0.0 && shape > 0.0 {
2659                    (shape, scale)
2660                } else {
2661                    // Invalid parameters: use defaults
2662                    (1.0, 1.0)
2663                }
2664            } else {
2665                // Data doesn't match Weibull (e.g., negative values or invalid parameters): use defaults
2666                // Default: shape=1 (exponential), scale=1
2667                (1.0, 1.0)
2668            };
2669
2670            scale_est * (-(1.0 - p).ln()).powf(1.0 / shape_est)
2671        }
2672        DistributionType::PowerLaw | DistributionType::Unknown => {
2673            // Fallback: use empirical quantiles from percentiles
2674            interpolate_empirical_quantile(dist, p)
2675        }
2676    }
2677}
2678
2679// Helper function to interpolate empirical quantiles from known percentiles
2680fn interpolate_empirical_quantile(dist: &DistributionAnalysis, p: f64) -> f64 {
2681    // Interpolate between known percentiles
2682    if p <= 0.05 {
2683        dist.percentiles.p5
2684    } else if p <= 0.25 {
2685        dist.percentiles.p5 + (dist.percentiles.p25 - dist.percentiles.p5) * ((p - 0.05) / 0.20)
2686    } else if p <= 0.50 {
2687        dist.percentiles.p25 + (dist.percentiles.p50 - dist.percentiles.p25) * ((p - 0.25) / 0.25)
2688    } else if p <= 0.75 {
2689        dist.percentiles.p50 + (dist.percentiles.p75 - dist.percentiles.p50) * ((p - 0.50) / 0.25)
2690    } else if p <= 0.95 {
2691        dist.percentiles.p75 + (dist.percentiles.p95 - dist.percentiles.p75) * ((p - 0.75) / 0.20)
2692    } else {
2693        dist.percentiles.p95
2694    }
2695}
2696
2697fn approximate_normal_quantile(p: f64) -> f64 {
2698    // Approximation of inverse CDF for standard normal distribution
2699    // Beasley-Springer-Moro algorithm (simplified)
2700    if p < 0.5 {
2701        -approximate_normal_quantile(1.0 - p)
2702    } else {
2703        let t = ((p - 0.5).ln() * -2.0).sqrt();
2704        t - (2.515517 + 0.802853 * t + 0.010328 * t * t)
2705            / (1.0 + 1.432788 * t + 0.189269 * t * t + 0.001308 * t * t * t)
2706    }
2707}
datui_lib/widgets/analysis.rs

datui_lib/widgets/
analysis.rs