use std::{fmt, fs, path::PathBuf};
use crate::{
cli::{DictName, LangSpecs, Options},
lang::{Edition, EditionSpec, Lang},
};
#[derive(Debug, Clone, Copy)]
pub enum DictionaryType {
Main,
Glossary,
GlossaryExtended,
Ipa,
IpaMerged,
}
impl fmt::Display for DictionaryType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(match self {
Self::Main => "main",
Self::Glossary => "glossary",
Self::GlossaryExtended => "glossary-ext",
Self::Ipa => "ipa",
Self::IpaMerged => "ipa-merged",
})
}
}
#[derive(Debug, PartialEq, Eq)]
pub enum PathKind {
Filtered,
Unfiltered,
}
#[derive(Debug, PartialEq, Eq)]
struct DatasetPath {
kind: PathKind,
path: PathBuf,
}
#[derive(Debug, PartialEq, Eq)]
pub struct DatasetPaths {
inner: Vec<DatasetPath>,
}
impl DatasetPaths {
pub fn of_kind(&self, kinds: &[PathKind]) -> Vec<PathBuf> {
self.inner
.iter()
.filter_map(|p| {
if kinds.contains(&p.kind) {
Some(p.path.clone())
} else {
None
}
})
.collect()
}
}
fn dataset_raw_unfiltered(edition: Edition, root: PathBuf) -> DatasetPath {
DatasetPath {
kind: PathKind::Unfiltered,
path: root.join(format!("{edition}-extract.jsonl")),
}
}
fn dataset_raw_filtered(edition: Edition, lang: Lang, root: PathBuf) -> DatasetPath {
DatasetPath {
kind: PathKind::Filtered,
path: root.join(format!("{lang}-{edition}-extract.jsonl")),
}
}
impl DatasetPaths {
pub fn new(edition: Edition, lang: Option<Lang>, root: PathBuf) -> Self {
Self {
inner: match lang {
Some(lang) => vec![
dataset_raw_filtered(edition, lang, root.clone()),
dataset_raw_unfiltered(edition, root),
],
None => vec![dataset_raw_unfiltered(edition, root)],
},
}
}
}
#[derive(Debug, Clone)]
pub struct PathManager {
pub dict_ty: DictionaryType,
pub dict_name: DictName,
pub langs: LangSpecs,
pub opts: Options,
}
impl PathManager {
pub const fn set_edition(&mut self, edition: EditionSpec) {
self.langs.edition = edition;
}
pub const fn set_source(&mut self, source: Lang) {
self.langs.source = source;
}
pub const fn set_target(&mut self, target: Lang) {
self.langs.target = target;
}
pub const fn langs(&self) -> (EditionSpec, Lang, Lang) {
let LangSpecs {
edition,
source,
target,
} = self.langs;
(edition, source, target)
}
const fn root_dir(&self) -> &PathBuf {
&self.opts.root_dir
}
pub fn dir_kaik(&self) -> PathBuf {
self.root_dir().join("kaikki")
}
pub fn dir_dicts(&self) -> PathBuf {
self.root_dir().join("dict")
}
fn dir_dict(&self) -> PathBuf {
self.dir_dicts().join(match self.dict_ty {
DictionaryType::IpaMerged => format!("{}/{}", self.langs.edition, self.langs.target),
_ => format!("{}/{}", self.langs.source, self.langs.target),
})
}
fn dir_temp(&self) -> PathBuf {
self.dir_dict().join(format!("temp-{}", self.dict_ty))
}
pub fn dir_tidy(&self) -> PathBuf {
self.dir_temp().join("tidy")
}
pub fn setup_dirs(&self) -> anyhow::Result<()> {
fs::create_dir_all(self.dir_kaik())?;
fs::create_dir_all(self.dir_dict())?;
if self.opts.save_temps {
fs::create_dir_all(self.dir_tidy())?;
fs::create_dir_all(self.dir_temp_dict())?;
}
Ok(())
}
pub fn dataset_paths(&self, edition: Edition, lang: Option<Lang>) -> DatasetPaths {
DatasetPaths::new(edition, lang, self.dir_kaik())
}
pub fn path_lemmas(&self) -> PathBuf {
self.dir_tidy().join(format!(
"{}-{}-lemmas.json",
self.langs.source, self.langs.target
))
}
pub fn path_forms(&self) -> PathBuf {
self.dir_tidy().join(format!(
"{}-{}-forms.json",
self.langs.source, self.langs.target
))
}
pub fn dir_temp_dict(&self) -> PathBuf {
self.dir_temp().join("dict")
}
pub fn dict_name_expanded(&self) -> String {
let dict_name = &self.dict_name;
let LangSpecs {
edition,
source,
target,
} = self.langs;
use DictionaryType::*;
let mut expanded = match self.dict_ty {
Main => format!("{dict_name}-{source}-{target}"),
Glossary => format!("{dict_name}-{source}-{target}-gloss"),
GlossaryExtended => format!("{dict_name}-{edition}-{source}-{target}-gloss"),
Ipa => format!("{dict_name}-{source}-{target}-ipa"),
IpaMerged => format!("{dict_name}-{target}-ipa"),
};
if self.opts.experimental {
expanded.push_str("-exp");
}
expanded
}
pub fn path_dict(&self) -> PathBuf {
self.dir_dict()
.join(format!("{}.zip", self.dict_name_expanded()))
}
pub fn dir_diagnostics(&self) -> PathBuf {
self.dir_temp().join("diagnostics")
}
}