use crate::commands;
use crate::source::KartSourceEnum;
use clap::Args;
use num_format::{Locale, ToFormattedString};
use rayon::prelude::*;
use serde::Serialize;
use std::path::Path;
#[derive(Args, Debug, Clone)]
pub struct ListArgs {
#[arg(default_value = ".")]
pub path: String,
#[arg(long)]
pub git_rev: Option<String>,
#[arg(long)]
pub schema: bool,
#[arg(long)]
pub stats: bool,
#[arg(long)]
pub json: bool,
}
#[derive(Serialize)]
struct DatasetStats {
name: String,
count: usize,
size: u64,
}
#[derive(Serialize)]
struct DatasetOutput {
name: String,
#[serde(skip_serializing_if = "Option::is_none")]
count: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
size: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
schema: Option<serde_json::Value>,
}
pub fn run(args: ListArgs) -> Result<(), Box<dyn std::error::Error>> {
let (is_git, final_path) = commands::resolve_source_path(&args.path);
let datasets = commands::find_datasets(&final_path, is_git, args.git_rev.as_deref())?;
for (name, source) in datasets {
let name_only = Path::new(&name)
.file_name()
.unwrap_or_default()
.to_string_lossy()
.to_string();
let mut count = None;
let mut size = None;
if args.stats {
let stats = calculate_stats(name_only.clone(), &source)?;
count = Some(stats.count);
size = Some(stats.size);
}
let mut schema = None;
if args.schema {
schema = Some(serde_json::to_value(
source
.get_schema()
.map_err(|e| e as Box<dyn std::error::Error>)?,
)?);
}
if args.json {
let output = DatasetOutput {
name: name_only,
count,
size,
schema,
};
println!("{}", serde_json::to_string(&output)?);
} else {
if let (Some(c), Some(s)) = (count, size) {
println!(
"{: <50} {: >10} {: >15}",
name_only,
c.to_formatted_string(&Locale::en),
humansize::format_size(s, humansize::DECIMAL)
);
} else {
println!("{}", name);
}
if let Some(sch) = schema {
println!("{}", serde_json::to_string_pretty(&sch)?);
}
}
}
Ok(())
}
fn calculate_stats(
name: String,
source: &KartSourceEnum,
) -> Result<DatasetStats, Box<dyn std::error::Error>> {
let identifiers: Vec<_> = source.feature_identifiers().collect();
let count = identifiers.len();
let size: u64 = identifiers
.par_iter()
.map(|id| source.get_feature_size(id).unwrap_or(0))
.sum();
Ok(DatasetStats { name, count, size })
}