1use std::collections::HashMap;
4use std::path::Path;
5use std::sync::Arc;
6
7use polars::prelude::*;
8use polars_parquet::parquet::metadata::FileMetadata;
9use polars_parquet::parquet::read::read_metadata;
10
11pub type ParquetMetadataCache = Arc<FileMetadata>;
13use ratatui::buffer::Buffer;
14use ratatui::layout::{Constraint, Direction, Layout, Rect};
15use ratatui::prelude::Stylize;
16use ratatui::style::{Modifier, Style};
17use ratatui::text::{Line, Span};
18use ratatui::widgets::{
19 Block, BorderType, Borders, Gauge, Padding, Paragraph, Row, StatefulWidget, Table, Tabs, Widget,
20};
21
22use super::datatable::DataTableState;
23use crate::export_modal::ExportFormat;
24
25pub fn format_bytes(n: u64) -> String {
27 const K: u64 = 1024;
28 const M: u64 = K * K;
29 const G: u64 = M * K;
30 if n >= G {
31 format!("{:.1} GiB", n as f64 / G as f64)
32 } else if n >= M {
33 format!("{:.1} MiB", n as f64 / M as f64)
34 } else if n >= K {
35 format!("{:.1} KiB", n as f64 / K as f64)
36 } else {
37 format!("{} B", n)
38 }
39}
40
41#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
42pub enum InfoTab {
43 #[default]
44 Schema,
45 Resources,
46 Partitions,
47}
48
49impl InfoTab {
50 pub fn next(self, has_partitions: bool) -> Self {
52 match self {
53 InfoTab::Schema => InfoTab::Resources,
54 InfoTab::Resources => {
55 if has_partitions {
56 InfoTab::Partitions
57 } else {
58 InfoTab::Schema
59 }
60 }
61 InfoTab::Partitions => InfoTab::Schema,
62 }
63 }
64 pub fn prev(self, has_partitions: bool) -> Self {
65 match self {
66 InfoTab::Schema => {
67 if has_partitions {
68 InfoTab::Partitions
69 } else {
70 InfoTab::Resources
71 }
72 }
73 InfoTab::Resources => InfoTab::Schema,
74 InfoTab::Partitions => InfoTab::Resources,
75 }
76 }
77 pub fn index(self, has_partitions: bool) -> usize {
79 match self {
80 InfoTab::Schema => 0,
81 InfoTab::Resources => 1,
82 InfoTab::Partitions => {
83 if has_partitions {
84 2
85 } else {
86 0
87 }
88 }
89 }
90 }
91}
92
93#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
94pub enum InfoFocus {
95 #[default]
96 TabBar,
97 Body,
98}
99
100#[derive(Default)]
102pub struct InfoModal {
103 pub active: bool,
104 pub active_tab: InfoTab,
105 pub focus: InfoFocus,
106 pub schema_selected_index: usize,
107 pub schema_scroll_offset: usize,
108 pub schema_table_state: ratatui::widgets::TableState,
109 pub schema_visible_height: usize,
111}
112
113impl InfoModal {
114 pub fn new() -> Self {
115 Self::default()
116 }
117
118 pub fn open(&mut self) {
119 self.active = true;
120 self.active_tab = InfoTab::Schema;
121 self.focus = InfoFocus::Body;
122 self.schema_selected_index = 0;
123 self.schema_scroll_offset = 0;
124 self.schema_table_state.select(Some(0));
125 }
126
127 pub fn close(&mut self) {
128 self.active = false;
129 }
130
131 pub fn next_focus(&mut self) {
132 self.focus = match self.focus {
133 InfoFocus::TabBar => InfoFocus::Body,
134 InfoFocus::Body => InfoFocus::TabBar,
135 };
136 }
137
138 pub fn prev_focus(&mut self) {
139 self.focus = match self.focus {
140 InfoFocus::TabBar => InfoFocus::Body,
141 InfoFocus::Body => InfoFocus::TabBar,
142 };
143 }
144
145 pub fn switch_tab(&mut self, has_partitions: bool) {
147 self.active_tab = self.active_tab.next(has_partitions);
148 if self.active_tab == InfoTab::Schema {
149 self.schema_selected_index = 0;
150 self.schema_scroll_offset = 0;
151 self.schema_table_state.select(Some(0));
152 } else {
153 self.focus = InfoFocus::TabBar;
154 }
155 }
156
157 pub fn switch_tab_prev(&mut self, has_partitions: bool) {
159 self.active_tab = self.active_tab.prev(has_partitions);
160 if self.active_tab == InfoTab::Schema {
161 self.schema_selected_index = 0;
162 self.schema_scroll_offset = 0;
163 self.schema_table_state.select(Some(0));
164 } else {
165 self.focus = InfoFocus::TabBar;
166 }
167 }
168
169 pub fn schema_table_down(&mut self, total_rows: usize, visible_height: usize) -> bool {
172 if total_rows == 0 {
173 return false;
174 }
175 let max_idx = total_rows.saturating_sub(1);
176 if self.schema_selected_index >= max_idx {
177 return false;
178 }
179 self.schema_selected_index += 1;
180 let visible_end = self.schema_scroll_offset + visible_height;
181 if visible_height > 0 && self.schema_selected_index >= visible_end {
182 self.schema_scroll_offset = self.schema_selected_index + 1 - visible_height;
183 }
184 let local = self
185 .schema_selected_index
186 .saturating_sub(self.schema_scroll_offset);
187 self.schema_table_state.select(Some(local));
188 true
189 }
190
191 pub fn schema_table_up(&mut self, total_rows: usize, _visible_height: usize) -> bool {
192 if total_rows == 0 || self.schema_selected_index == 0 {
193 return false;
194 }
195 self.schema_selected_index -= 1;
196 if self.schema_selected_index < self.schema_scroll_offset {
197 self.schema_scroll_offset = self.schema_selected_index;
198 }
199 let local = self
200 .schema_selected_index
201 .saturating_sub(self.schema_scroll_offset);
202 self.schema_table_state.select(Some(local));
203 true
204 }
205
206 pub fn sync_schema_table_state(&mut self, total_rows: usize, visible_height: usize) {
208 if total_rows == 0 {
209 self.schema_table_state.select(None);
210 return;
211 }
212 let max_idx = total_rows.saturating_sub(1);
213 self.schema_selected_index = self.schema_selected_index.min(max_idx);
214 if self.schema_scroll_offset + visible_height <= self.schema_selected_index
215 && visible_height > 0
216 {
217 self.schema_scroll_offset = self.schema_selected_index + 1 - visible_height;
218 }
219 if self.schema_selected_index < self.schema_scroll_offset {
220 self.schema_scroll_offset = self.schema_selected_index;
221 }
222 let local = self
223 .schema_selected_index
224 .saturating_sub(self.schema_scroll_offset);
225 self.schema_table_state.select(Some(local));
226 }
227}
228
229pub struct InfoContext<'a> {
231 pub path: Option<&'a Path>,
232 pub format: Option<ExportFormat>,
233 pub parquet_metadata: Option<&'a ParquetMetadataCache>,
234}
235
236impl<'a> InfoContext<'a> {
237 pub fn schema_source(&self) -> &'static str {
238 match self.format {
239 Some(ExportFormat::Parquet) | Some(ExportFormat::Ipc) | Some(ExportFormat::Avro) => {
240 "Known"
241 }
242 _ => "Inferred",
243 }
244 }
245
246 pub fn file_size_bytes(&self) -> Option<u64> {
247 self.path
248 .and_then(|p| std::fs::metadata(p).ok())
249 .map(|m| m.len())
250 }
251}
252
253fn parquet_column_compression(
255 meta: &FileMetadata,
256 polars_schema: &Schema,
257) -> HashMap<String, (String, f64)> {
258 let mut by_name: HashMap<String, (u64, u64)> = HashMap::new();
259 let mut codec_by_name: HashMap<String, String> = HashMap::new();
260 for rg in &meta.row_groups {
261 for cc in rg.parquet_columns() {
262 let name = cc
263 .descriptor()
264 .path_in_schema
265 .first()
266 .map(|s| s.as_ref())
267 .unwrap_or("");
268 let comp = cc.compressed_size() as u64;
269 let uncomp = cc.uncompressed_size() as u64;
270 let codec = format!("{:?}", cc.compression()).to_lowercase();
271 let e = by_name.entry(name.to_string()).or_insert((0, 0));
272 e.0 = e.0.saturating_add(comp);
273 e.1 = e.1.saturating_add(uncomp);
274 codec_by_name.insert(name.to_string(), codec);
275 }
276 }
277 let mut out = HashMap::new();
278 for (name, (comp, uncomp)) in by_name {
279 if !polars_schema.contains(&name) {
280 continue;
281 }
282 let codec = codec_by_name
283 .get(&name)
284 .cloned()
285 .unwrap_or_else(|| "—".to_string());
286 if comp > 0 && uncomp > 0 {
287 let ratio = uncomp as f64 / comp as f64;
288 out.insert(name, (codec, ratio));
289 }
290 }
291 out
292}
293
294fn parquet_overall_sizes(meta: &FileMetadata) -> (u64, u64) {
296 let mut comp: u64 = 0;
297 let mut uncomp: u64 = 0;
298 for rg in &meta.row_groups {
299 comp = comp.saturating_add(rg.compressed_size() as u64);
300 uncomp = uncomp.saturating_add(rg.total_byte_size() as u64);
301 }
302 (comp, uncomp)
303}
304
305pub struct DataTableInfo<'a> {
306 pub state: &'a DataTableState,
307 pub ctx: InfoContext<'a>,
308 pub modal: &'a mut InfoModal,
309 pub border_color: ratatui::style::Color,
310 pub active_color: ratatui::style::Color,
311 pub primary_color: ratatui::style::Color,
312}
313
314impl<'a> DataTableInfo<'a> {
315 pub fn new(
316 state: &'a DataTableState,
317 ctx: InfoContext<'a>,
318 modal: &'a mut InfoModal,
319 border_color: ratatui::style::Color,
320 active_color: ratatui::style::Color,
321 primary_color: ratatui::style::Color,
322 ) -> Self {
323 Self {
324 state,
325 ctx,
326 modal,
327 border_color,
328 active_color,
329 primary_color,
330 }
331 }
332
333 fn render_schema_tab(&mut self, area: Rect, buf: &mut Buffer) {
334 let summary = self.render_schema_summary(area, buf);
335 let rest = Rect {
336 y: area.y + summary,
337 height: area.height.saturating_sub(summary),
338 ..area
339 };
340 if rest.height == 0 {
341 return;
342 }
343 self.render_schema_table(rest, buf);
344 }
345
346 fn render_schema_summary(&self, area: Rect, buf: &mut Buffer) -> u16 {
347 let ncols = self.state.schema.len();
348 let nrows = self.state.num_rows;
349 let mut lines = vec![];
350 lines.push(format!(
351 "Rows (total): {} · Columns: {}",
352 format_int(nrows),
353 ncols
354 ));
355 let by_type = columns_by_type(self.state.schema.as_ref());
356 if !by_type.is_empty() {
357 lines.push(by_type);
358 }
359 for (i, s) in lines.iter().enumerate() {
360 Paragraph::new(s.as_str()).render(
361 Rect {
362 x: area.x,
363 y: area.y + i as u16,
364 width: area.width,
365 height: 1,
366 },
367 buf,
368 );
369 }
370 lines.len() as u16
371 }
372
373 fn render_schema_table(&mut self, area: Rect, buf: &mut Buffer) {
374 let src = self.ctx.schema_source();
375 let compression = self
376 .ctx
377 .parquet_metadata
378 .map(|m| parquet_column_compression(m.as_ref(), self.state.schema.as_ref()));
379 let has_comp = compression.as_ref().is_some_and(|c| !c.is_empty());
380 let header = if has_comp {
381 Row::new(vec!["Column", "Type", "Source", "Compression"]).bold()
382 } else {
383 Row::new(vec!["Column", "Type", "Source"]).bold()
384 };
385
386 let total_rows = self.state.schema.len();
387 let body_focused = self.modal.focus == InfoFocus::Body;
388 let border_style = if body_focused {
389 Style::default().fg(self.active_color)
390 } else {
391 Style::default().fg(self.border_color)
392 };
393 let block = Block::default()
394 .title(Line::from(format!("Schema: {}", src)).bold())
395 .padding(Padding::new(1, 1, 1, 1))
396 .border_style(border_style);
397 let inner = block.inner(area);
398 let visible_height = inner.height as usize;
399 block.render(area, buf);
400
401 let data_height = visible_height.saturating_sub(1);
402 self.modal.schema_visible_height = data_height;
403 self.modal.sync_schema_table_state(total_rows, data_height);
404
405 let offset = self.modal.schema_scroll_offset;
406 let take = visible_height
407 .saturating_sub(1)
408 .min(total_rows.saturating_sub(offset));
409 let mut rows = vec![];
410 for (idx, (name, dtype)) in self.state.schema.iter().enumerate() {
411 if idx < offset {
412 continue;
413 }
414 if idx >= offset + take {
415 break;
416 }
417 let name_str: &str = name.as_ref();
418 let comp_str = compression
419 .as_ref()
420 .and_then(|c| c.get(name_str))
421 .map(|(codec, ratio)| format!("{} {:.1}×", codec, ratio))
422 .unwrap_or_else(|| "—".to_string());
423 let row = if has_comp {
424 Row::new(vec![
425 name.to_string(),
426 dtype.to_string(),
427 src.to_string(),
428 comp_str,
429 ])
430 } else {
431 Row::new(vec![name.to_string(), dtype.to_string(), src.to_string()])
432 };
433 rows.push(row);
434 }
435
436 let widths: Vec<Constraint> = if has_comp {
437 vec![
438 Constraint::Percentage(25),
439 Constraint::Percentage(35),
440 Constraint::Percentage(15),
441 Constraint::Percentage(25),
442 ]
443 } else {
444 vec![
445 Constraint::Percentage(40),
446 Constraint::Percentage(40),
447 Constraint::Percentage(20),
448 ]
449 };
450 let table = Table::new(rows, widths)
451 .header(header)
452 .column_spacing(1)
453 .row_highlight_style(Style::default().add_modifier(Modifier::REVERSED))
454 .highlight_symbol(">> ");
455 StatefulWidget::render(table, inner, buf, &mut self.modal.schema_table_state);
456 }
457
458 fn render_resources_tab(&self, area: Rect, buf: &mut Buffer) {
459 const LABEL_WIDTH: u16 = 16;
460 let label_constraint = Constraint::Length(LABEL_WIDTH);
461 let value_constraint = Constraint::Min(1);
462 let mut y = area.y;
463 let h = area.height;
464 let w = area.width;
465
466 fn label_value_row(label: &str, value: &str, area: Rect, buf: &mut Buffer, label_w: u16) {
467 let chunks = Layout::default()
468 .direction(Direction::Horizontal)
469 .constraints([Constraint::Length(label_w), Constraint::Min(1)])
470 .split(area);
471 Paragraph::new(label).render(chunks[0], buf);
472 Paragraph::new(value).render(chunks[1], buf);
473 }
474
475 if y >= area.y + h {
476 return;
477 }
478 let file_size = self.ctx.file_size_bytes().map(format_bytes);
479 let file_size_str = file_size.as_deref().unwrap_or("—");
480 label_value_row(
481 "File size:",
482 file_size_str,
483 Rect {
484 y,
485 width: w,
486 height: 1,
487 ..area
488 },
489 buf,
490 LABEL_WIDTH,
491 );
492 y += 1;
493
494 if y >= area.y + h {
495 return;
496 }
497 let fmt = self.ctx.format.map(|f| f.as_str()).unwrap_or("—");
498 label_value_row(
499 "Format:",
500 fmt,
501 Rect {
502 y,
503 width: w,
504 height: 1,
505 ..area
506 },
507 buf,
508 LABEL_WIDTH,
509 );
510 y += 1;
511
512 if y >= area.y + h {
513 return;
514 }
515 let buf_rows = self.state.buffered_rows();
516 let max_rows = self.state.max_buffered_rows();
517 let row_area = Rect {
518 y,
519 width: w,
520 height: 1,
521 ..area
522 };
523 let row_chunks = Layout::default()
524 .direction(Direction::Horizontal)
525 .constraints([label_constraint, value_constraint])
526 .split(row_area);
527 Paragraph::new("Buffer (Rows):").render(row_chunks[0], buf);
528 if max_rows > 0 {
529 let ratio = (buf_rows as f64 / max_rows as f64).min(1.0);
530 let label = format!("{} / {}", format_int(buf_rows), format_int(max_rows));
531 Gauge::default()
532 .gauge_style(Style::default().fg(self.primary_color))
533 .ratio(ratio)
534 .label(Span::raw(label))
535 .render(row_chunks[1], buf);
536 } else {
537 Paragraph::new(format_int(buf_rows)).render(row_chunks[1], buf);
538 }
539 y += 1;
540
541 if y >= area.y + h {
542 return;
543 }
544 let buf_mb = self
545 .state
546 .buffered_memory_bytes()
547 .map(|b| b / (1024 * 1024));
548 let max_mb = self.state.max_buffered_mb();
549 let mb_area = Rect {
550 y,
551 width: w,
552 height: 1,
553 ..area
554 };
555 let mb_chunks = Layout::default()
556 .direction(Direction::Horizontal)
557 .constraints([label_constraint, value_constraint])
558 .split(mb_area);
559 Paragraph::new("Buffer (MB):").render(mb_chunks[0], buf);
560 if max_mb > 0 {
561 let current_mb = buf_mb.unwrap_or(0);
562 let ratio = (current_mb as f64 / max_mb as f64).min(1.0);
563 let label = match buf_mb {
564 Some(m) => format!("{:.1} / {} MiB", m as f64, max_mb),
565 None => "—".to_string(),
566 };
567 Gauge::default()
568 .gauge_style(Style::default().fg(self.primary_color))
569 .ratio(ratio)
570 .label(Span::raw(label))
571 .render(mb_chunks[1], buf);
572 } else {
573 let value = buf_mb
574 .map(|m| format!("{:.1} MiB", m as f64))
575 .unwrap_or_else(|| {
576 self.state
577 .buffered_memory_bytes()
578 .map(|b| format_bytes(b as u64))
579 .unwrap_or_else(|| "—".to_string())
580 });
581 Paragraph::new(value).render(mb_chunks[1], buf);
582 }
583 y += 1;
584
585 if let Some(ref meta) = self.ctx.parquet_metadata {
586 let (comp, uncomp) = parquet_overall_sizes(meta.as_ref());
587 if comp > 0 && uncomp > 0 && y < area.y + h {
588 let ratio = uncomp as f64 / comp as f64;
589 let value = format!("{:.1}× (uncomp. {})", ratio, format_bytes(uncomp));
590 label_value_row(
591 "Parquet comp.:",
592 &value,
593 Rect {
594 y,
595 width: w,
596 height: 1,
597 ..area
598 },
599 buf,
600 LABEL_WIDTH,
601 );
602 y += 1;
603 }
604 if y < area.y + h {
605 label_value_row(
606 "Row groups:",
607 &meta.row_groups.len().to_string(),
608 Rect {
609 y,
610 width: w,
611 height: 1,
612 ..area
613 },
614 buf,
615 LABEL_WIDTH,
616 );
617 y += 1;
618 }
619 if y < area.y + h {
620 label_value_row(
621 "Parquet version:",
622 &meta.version.to_string(),
623 Rect {
624 y,
625 width: w,
626 height: 1,
627 ..area
628 },
629 buf,
630 LABEL_WIDTH,
631 );
632 y += 1;
633 }
634 if let Some(ref cb) = meta.created_by {
635 if y < area.y + h {
636 label_value_row(
637 "Created by:",
638 cb,
639 Rect {
640 y,
641 width: w,
642 height: 1,
643 ..area
644 },
645 buf,
646 LABEL_WIDTH,
647 );
648 }
649 }
650 }
651 }
652
653 fn render_partitioned_data_tab(&self, area: Rect, buf: &mut Buffer) {
654 let y = area.y;
655 let w = area.width;
656
657 let Some(partition_columns) = self.state.partition_columns.as_ref() else {
658 Paragraph::new("No partition metadata.").render(
659 Rect {
660 y,
661 width: w,
662 height: 1,
663 ..area
664 },
665 buf,
666 );
667 return;
668 };
669
670 if partition_columns.is_empty() {
671 Paragraph::new("No partition columns.").render(
672 Rect {
673 y,
674 width: w,
675 height: 1,
676 ..area
677 },
678 buf,
679 );
680 return;
681 }
682
683 let line = format!("Partition columns: {}", partition_columns.join(", "));
684 Paragraph::new(line).render(
685 Rect {
686 y,
687 width: w,
688 height: 1,
689 ..area
690 },
691 buf,
692 );
693 }
694}
695
696fn format_int(n: usize) -> String {
697 let s = n.to_string();
698 let mut out = String::new();
699 for (i, c) in s.chars().rev().enumerate() {
700 if i > 0 && i % 3 == 0 {
701 out.insert(0, ',');
702 }
703 out.insert(0, c);
704 }
705 out
706}
707
708fn columns_by_type(schema: &Schema) -> String {
709 let mut counts: HashMap<String, usize> = HashMap::new();
710 for (_, dtype) in schema.iter() {
711 let k = dtype.to_string();
712 *counts.entry(k).or_default() += 1;
713 }
714 let mut pairs: Vec<_> = counts.into_iter().collect();
715 pairs.sort_by(|a, b| a.0.cmp(&b.0));
716 pairs
717 .into_iter()
718 .map(|(k, v)| format!("{}: {}", k, v))
719 .collect::<Vec<_>>()
720 .join(" · ")
721}
722
723impl<'a> Widget for &mut DataTableInfo<'a> {
724 fn render(self, area: Rect, buf: &mut Buffer) {
725 let tab_bar_focused = self.modal.focus == InfoFocus::TabBar;
726 let block = Block::default()
727 .borders(Borders::ALL)
728 .border_type(BorderType::Rounded)
729 .title("Info");
730
731 let inner = block.inner(area);
732 block.render(area, buf);
733
734 let chunks = Layout::default()
735 .direction(Direction::Vertical)
736 .constraints([Constraint::Length(2), Constraint::Min(4)])
737 .split(inner);
738
739 let tab_chunks = Layout::default()
740 .direction(Direction::Vertical)
741 .constraints([Constraint::Length(1), Constraint::Length(1)])
742 .split(chunks[0]);
743
744 let has_partitions = self
745 .state
746 .partition_columns
747 .as_ref()
748 .map(|v| !v.is_empty())
749 .unwrap_or(false);
750 let tab_titles: Vec<&str> = if has_partitions {
751 vec!["Schema", "Resources", "Partitions"]
752 } else {
753 vec!["Schema", "Resources"]
754 };
755 let sel = self.modal.active_tab.index(has_partitions);
756 let tabs = Tabs::new(tab_titles)
757 .style(Style::default().fg(self.border_color))
758 .highlight_style(
759 Style::default()
760 .fg(self.active_color)
761 .add_modifier(Modifier::REVERSED),
762 )
763 .select(sel);
764 tabs.render(tab_chunks[0], buf);
765 let line_style = if tab_bar_focused {
766 Style::default().fg(self.active_color)
767 } else {
768 Style::default().fg(self.border_color)
769 };
770 Block::default()
771 .borders(Borders::BOTTOM)
772 .border_type(BorderType::Rounded)
773 .border_style(line_style)
774 .render(tab_chunks[1], buf);
775
776 match self.modal.active_tab {
777 InfoTab::Schema => self.render_schema_tab(chunks[1], buf),
778 InfoTab::Resources => self.render_resources_tab(chunks[1], buf),
779 InfoTab::Partitions => {
780 if has_partitions {
781 self.render_partitioned_data_tab(chunks[1], buf)
782 } else {
783 self.render_schema_tab(chunks[1], buf)
784 }
785 }
786 }
787 }
788}
789
790pub fn read_parquet_metadata(path: &Path) -> Option<ParquetMetadataCache> {
792 let mut f = std::fs::File::open(path).ok()?;
793 let meta = read_metadata(&mut f).ok()?;
794 Some(Arc::new(meta))
795}
796
797#[cfg(test)]
798mod tests {
799 use super::*;
800
801 #[test]
802 fn test_format_bytes() {
803 assert_eq!(format_bytes(0), "0 B");
804 assert_eq!(format_bytes(500), "500 B");
805 assert_eq!(format_bytes(1536), "1.5 KiB");
806 assert_eq!(format_bytes(1024 * 1024), "1.0 MiB");
807 }
808
809 #[test]
810 fn test_format_int() {
811 assert_eq!(format_int(0), "0");
812 assert_eq!(format_int(1234), "1,234");
813 assert_eq!(format_int(1_234_567), "1,234,567");
814 }
815}