#![allow(clippy::needless_doctest_main)]
#![cfg_attr(
not(test),
deny(
// This is a tool, and as such we don't care about panics too much
// clippy::indexing_slicing,
// clippy::unwrap_used,
// clippy::expect_used,
// clippy::panic,
clippy::exhaustive_structs,
clippy::exhaustive_enums,
// TODO(#2266): enable missing_debug_implementations,
)
)]
#![warn(missing_docs)]
mod databake;
mod error;
mod registry;
mod source;
#[cfg(test)]
mod testutil;
mod transform;
pub use error::{is_missing_cldr_error, is_missing_icuexport_error};
pub use registry::{all_keys, all_keys_with_experimental};
pub use source::{CldrLocaleSubset, CollationHanDatabase, SourceData};
pub mod syntax {
pub use icu_provider_fs::export::serializers::bincode::Serializer as Bincode;
pub use icu_provider_fs::export::serializers::json::Serializer as Json;
pub use icu_provider_fs::export::serializers::postcard::Serializer as Postcard;
}
pub mod prelude {
pub use super::{syntax, CldrLocaleSubset, CollationHanDatabase, Out, SourceData};
pub use icu_locid::{langid, LanguageIdentifier};
pub use icu_provider::KeyedDataMarker;
}
use icu_locid::LanguageIdentifier;
use icu_provider::datagen::*;
use icu_provider::prelude::*;
use icu_provider_adapters::empty::EmptyDataProvider;
use icu_provider_adapters::filter::Filterable;
use icu_provider_fs::export::serializers::AbstractSerializer;
use rayon::prelude::*;
use std::collections::HashSet;
use std::io::{BufRead, BufReader};
use std::path::{Path, PathBuf};
#[allow(clippy::exhaustive_structs)] #[derive(Debug, Clone)]
pub struct DatagenProvider {
pub source: SourceData,
}
#[cfg(test)]
impl DatagenProvider {
pub fn for_test() -> Self {
lazy_static::lazy_static! {
static ref TEST_PROVIDER: DatagenProvider = DatagenProvider {
source: SourceData::for_test(),
};
}
TEST_PROVIDER.clone()
}
}
impl AnyProvider for DatagenProvider {
fn load_any(&self, key: DataKey, req: DataRequest) -> Result<AnyResponse, DataError> {
self.as_any_provider().load_any(key, req)
}
}
pub fn key<S: AsRef<str>>(string: S) -> Option<DataKey> {
lazy_static::lazy_static! {
static ref LOOKUP: std::collections::HashMap<&'static str, DataKey> = all_keys_with_experimental()
.into_iter()
.chain(std::iter::once(
icu_provider::hello_world::HelloWorldV1Marker::KEY,
))
.map(|k| (k.path().get(), k))
.collect();
}
LOOKUP.get(string.as_ref()).copied()
}
pub fn keys<S: AsRef<str>>(strings: &[S]) -> Vec<DataKey> {
strings.iter().filter_map(crate::key).collect()
}
pub fn keys_from_file<P: AsRef<Path>>(path: P) -> std::io::Result<Vec<DataKey>> {
BufReader::new(std::fs::File::open(path.as_ref())?)
.lines()
.filter_map(|k| k.map(crate::key).transpose())
.collect()
}
pub fn keys_from_bin<P: AsRef<Path>>(path: P) -> std::io::Result<Vec<DataKey>> {
let file = std::fs::read(path.as_ref())?;
let mut result = Vec::new();
let mut i = 0;
let mut last_start = None;
while i < file.len() {
if file[i..].starts_with(icu_provider::leading_tag!().as_bytes()) {
i += icu_provider::leading_tag!().len();
last_start = Some(i);
} else if file[i..].starts_with(icu_provider::trailing_tag!().as_bytes())
&& last_start.is_some()
{
if let Some(key) = std::str::from_utf8(&file[last_start.unwrap()..i])
.ok()
.and_then(crate::key)
{
result.push(key);
}
i += icu_provider::trailing_tag!().len();
last_start = None;
} else {
i += 1;
}
}
result.sort();
result.dedup();
Ok(result)
}
#[non_exhaustive]
pub enum Out {
Fs {
output_path: PathBuf,
serializer: Box<dyn AbstractSerializer + Sync>,
overwrite: bool,
fingerprint: bool,
},
Blob(Box<dyn std::io::Write + Sync>),
Module {
mod_directory: PathBuf,
pretty: bool,
insert_feature_gates: bool,
use_separate_crates: bool,
},
}
pub fn datagen(
locales: Option<&[LanguageIdentifier]>,
keys: &[DataKey],
source: &SourceData,
outs: Vec<Out>,
) -> Result<(), DataError> {
let exporters = outs
.into_iter()
.map(|out| -> Result<Box<dyn DataExporter>, DataError> {
Ok(match out {
Out::Fs {
output_path,
serializer,
overwrite,
fingerprint,
} => {
let mut options = icu_provider_fs::export::ExporterOptions::default();
options.root = output_path;
if overwrite {
options.overwrite =
icu_provider_fs::export::OverwriteOption::RemoveAndReplace
}
options.fingerprint = fingerprint;
Box::new(icu_provider_fs::export::FilesystemExporter::try_new(
serializer, options,
)?)
}
Out::Blob(write) => Box::new(
icu_provider_blob::export::BlobExporter::new_with_sink(write),
),
Out::Module {
mod_directory,
pretty,
insert_feature_gates,
use_separate_crates,
} => Box::new(databake::BakedDataExporter::new(
mod_directory,
pretty,
insert_feature_gates,
use_separate_crates,
)?),
})
})
.collect::<Result<Vec<_>, DataError>>()?;
let provider: Box<dyn ExportableProvider> = match locales {
Some(&[]) => Box::new(EmptyDataProvider::default()),
Some(locales) => Box::new(
DatagenProvider {
source: source.clone(),
}
.filterable("icu4x-datagen locales")
.filter_by_langid(move |lid| lid.language.is_empty() || locales.contains(lid)),
),
None => Box::new(DatagenProvider {
source: source.clone(),
}),
};
let keys: HashSet<_> = keys.iter().collect();
keys.into_par_iter().try_for_each(|&key| {
let locales = provider
.supported_locales_for_key(key)
.map_err(|e| e.with_key(key))?;
let res = locales.into_par_iter().try_for_each(|locale| {
let req = DataRequest {
locale: &locale,
metadata: Default::default(),
};
let payload = provider
.load_data(key, req)
.and_then(DataResponse::take_payload)
.map_err(|e| e.with_req(key, req))?;
exporters.par_iter().try_for_each(|e| {
e.put_payload(key, &locale, &payload)
.map_err(|e| e.with_req(key, req))
})
});
log::info!("Writing key: {}", key);
for e in &exporters {
e.flush(key).map_err(|e| e.with_key(key))?;
}
res
})?;
for mut e in exporters {
e.close()?;
}
Ok(())
}
#[test]
fn test_keys() {
assert_eq!(
keys(&[
"list/and@1",
"datetime/gregory/datelengths@1",
"decimal/symbols@1",
"trash",
]),
vec![
icu_list::provider::AndListV1Marker::KEY,
icu_datetime::provider::calendar::GregorianDateLengthsV1Marker::KEY,
icu_decimal::provider::DecimalSymbolsV1Marker::KEY,
]
);
}
#[test]
fn test_keys_from_file() {
assert_eq!(
keys_from_file(
PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests/data/work_log+keys.txt")
)
.unwrap(),
vec![
icu_datetime::provider::calendar::GregorianDateLengthsV1Marker::KEY,
icu_datetime::provider::calendar::GregorianDateSymbolsV1Marker::KEY,
icu_datetime::provider::calendar::TimeSymbolsV1Marker::KEY,
icu_calendar::provider::WeekDataV1Marker::KEY,
icu_decimal::provider::DecimalSymbolsV1Marker::KEY,
icu_plurals::provider::OrdinalV1Marker::KEY,
]
);
}
#[test]
fn test_keys_from_bin() {
assert_eq!(
keys_from_bin(PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests/data/work_log.wasm"))
.unwrap(),
vec![
icu_datetime::provider::calendar::GregorianDateLengthsV1Marker::KEY,
icu_datetime::provider::calendar::GregorianDateSymbolsV1Marker::KEY,
icu_datetime::provider::calendar::TimeLengthsV1Marker::KEY,
icu_datetime::provider::calendar::TimeSymbolsV1Marker::KEY,
icu_calendar::provider::WeekDataV1Marker::KEY,
icu_decimal::provider::DecimalSymbolsV1Marker::KEY,
icu_plurals::provider::OrdinalV1Marker::KEY,
]
);
}