#![forbid(unsafe_code)]
#![doc(
html_logo_url = "https://raw.githubusercontent.com/ArdentEmpiricist/text_analysis/main/assets/text_analysis_logo.png"
)]
use clap::{Parser, ValueEnum};
use std::path::PathBuf;
use text_analysis::{AnalysisOptions, ExportFormat, StemLang, StemMode, analyze_path};
#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
struct Cli {
path: PathBuf,
#[arg(long)]
stopwords: Option<PathBuf>,
#[arg(long, default_value_t = 2)]
ngram: usize,
#[arg(long, default_value_t = 5)]
context: usize,
#[arg(long, value_enum, default_value_t = CliExportFormat::Txt)]
export_format: CliExportFormat,
#[arg(long, default_value_t = false)]
entities_only: bool,
#[arg(long, default_value_t = false)]
combine: bool,
#[arg(long, default_value_t = false)]
stem: bool,
#[arg(long)]
stem_lang: Option<String>,
#[arg(long, default_value_t = false)]
stem_strict: bool,
}
#[derive(Copy, Clone, Eq, PartialEq, Debug, ValueEnum)]
enum CliExportFormat {
Txt,
Csv,
Tsv,
Json,
}
impl From<CliExportFormat> for ExportFormat {
fn from(v: CliExportFormat) -> Self {
match v {
CliExportFormat::Txt => ExportFormat::Txt,
CliExportFormat::Csv => ExportFormat::Csv,
CliExportFormat::Tsv => ExportFormat::Tsv,
CliExportFormat::Json => ExportFormat::Json,
}
}
}
fn main() {
let cli = Cli::parse();
let stem_mode = match (cli.stem, cli.stem_lang.as_deref()) {
(_, Some(code)) => StemMode::Force(StemLang::from_code(code).unwrap_or(StemLang::Unknown)),
(true, None) => StemMode::Auto,
_ => StemMode::Off,
};
let options = AnalysisOptions {
ngram: cli.ngram,
context: cli.context,
export_format: cli.export_format.into(),
entities_only: cli.entities_only,
combine: cli.combine,
stem_mode,
stem_require_detected: cli.stem_strict,
};
match analyze_path(&cli.path, cli.stopwords.as_ref(), &options) {
Ok(report) => {
println!("{}", report.summary);
if !report.failed_files.is_empty() {
eprintln!("Warnings ({} files):", report.failed_files.len());
for (file, err) in report.failed_files {
eprintln!(" {} -> {}", file, err);
}
}
}
Err(e) => {
eprintln!("Error: {}", e);
std::process::exit(1);
}
}
}