use std::path::PathBuf;
use anyhow::Result;
use crate::{
lang::{Edition, Lang},
path::{PathKind, PathManager},
utils::skip_because_file_exists,
};
fn url_jsonl_gz(edition: Edition) -> Result<String> {
let root = "https://kaikki.org";
match edition {
Edition::En => Ok(format!("{root}/dictionary/raw-wiktextract-data.jsonl.gz")),
other => Ok(format!(
"{root}/{other}wiktionary/raw-wiktextract-data.jsonl.gz"
)),
}
}
pub fn find_or_download_jsonl(
edition: Edition,
lang: Option<Lang>,
pm: &PathManager,
) -> Result<PathBuf> {
let paths_candidates = pm.dataset_paths(edition, lang);
let kinds_to_check = [PathKind::Unfiltered, PathKind::Filtered];
let of_kind = paths_candidates.of_kind(&kinds_to_check);
if !pm.opts.redownload
&& let Some(existing) = of_kind.iter().find(|p| p.exists())
{
if !pm.opts.quiet {
skip_because_file_exists("download", &existing);
}
return Ok(existing.clone());
}
let path = &of_kind.into_iter().next_back().unwrap_or_else(|| {
panic!(
"No path available, \
for edition={edition:?} and lang={lang:?} | {paths_candidates:?}"
)
});
#[cfg(feature = "html")]
crate::download::download_jsonl(edition, path, false)?;
Ok(path.clone())
}
#[cfg(feature = "html")]
pub use html::*;
#[cfg(feature = "html")]
mod html {
use super::url_jsonl_gz;
use anyhow::Result;
use flate2::read::GzDecoder;
use std::fs::File;
use std::io::BufWriter;
use std::path::Path;
use crate::{
lang::Edition,
utils::{CHECK_C, pretty_println_at_path},
};
pub fn download_jsonl(edition: Edition, path_jsonl: &Path, quiet: bool) -> Result<()> {
let url = url_jsonl_gz(edition)?;
if !quiet {
println!("⬇ Downloading {url}");
}
let response = ureq::get(url).call()?;
if let Some(last_modified) = response.headers().get("last-modified") {
tracing::info!("Download was last modified: {:?}", last_modified);
}
let reader = response.into_body().into_reader();
let mut decoder = GzDecoder::new(reader);
let mut writer = BufWriter::new(File::create(path_jsonl)?);
std::io::copy(&mut decoder, &mut writer)?;
if !quiet {
pretty_println_at_path(&format!("{CHECK_C} Downloaded"), path_jsonl);
}
Ok(())
}
}