use crate::SourceDataProvider;
use icu::locale::{langid, LanguageIdentifier};
use icu_provider::dynutil::UpcastDataPayload;
use icu_provider::export::*;
use icu_provider::prelude::*;
use icu_provider_export::prelude::*;
use std::alloc::{GlobalAlloc, Layout, System};
use std::cell::Cell;
use std::collections::BTreeMap;
use std::collections::BTreeSet;
use std::sync::Mutex;
include!("../../tests/locales.rs.data");
#[test]
#[cfg(feature = "use_wasm")]
fn make_testdata() {
let exporter: Box<dyn DataExporter> = if std::option_env!("ICU4X_WRITE_TESTDATA").is_none() {
Box::new(ZeroCopyCheckExporter {
zero_copy_violations: Default::default(),
zero_copy_transient_violations: Default::default(),
rountrip_errors: Default::default(),
})
} else {
simple_logger::SimpleLogger::new()
.env()
.with_level(log::LevelFilter::Info)
.init()
.unwrap();
Box::new(MultiExporter::new(vec![
Box::new(
icu_provider_export::fs_exporter::FilesystemExporter::try_new(
Box::new(icu_provider_export::fs_exporter::serializers::Json::pretty()),
{
let mut options = icu_provider_export::fs_exporter::Options::default();
options.root = "data/debug".into();
options.overwrite =
icu_provider_export::fs_exporter::OverwriteOption::RemoveAndReplace;
options
},
)
.unwrap(),
),
Box::new(ZeroCopyCheckExporter {
zero_copy_violations: Default::default(),
zero_copy_transient_violations: Default::default(),
rountrip_errors: Default::default(),
}),
]))
};
let provider = SourceDataProvider::new_testing();
ExportDriver::new(
LOCALES
.iter()
.cloned()
.map(Into::into)
.map(DataLocaleFamily::single),
DeduplicationStrategy::RetainBaseLanguages.into(),
LocaleFallbacker::try_new_unstable(&provider).unwrap(),
)
.with_segmenter_models([
"thaidict".into(),
"Thai_codepoints_exclusive_model4_heavy".into(),
])
.with_marker_attributes_filter("units", |attrs| {
let (_length, unit) = attrs.as_str().split_once('-').unwrap();
matches!(
unit,
"meter" | "foot" | "kilogram" | "pound" | "hour" | "minute" | "second"
)
})
.with_marker_attributes_filter("currency", |attrs| {
matches!(attrs.as_str(), "CAD" | "EGP" | "EUR" | "GBP" | "USD")
})
.with_marker_attributes_filter("locale_names_region", |attrs| {
matches!(
attrs.as_str(),
"419" | "FR" | "CG" | "MM" | "HK" )
})
.with_marker_attributes_filter("locale_names_language", |attrs| {
matches!(
attrs.as_str(),
"fr" | "zh" | "en-GB" | "zh-Hant" | "de-CH" | "ku" )
})
.with_marker_attributes_filter("locale_names_script", |attrs| {
matches!(
attrs.as_str(),
"Latn" | "Hans" | "Hant" | "Cans" )
})
.with_marker_attributes_filter("locale_names_variant", |attrs| {
matches!(attrs.as_str(), "POSIX")
})
.with_marker_attributes_filter("numbering_system", |attrs| {
matches!(attrs.as_str(), "arab" | "beng" | "cakm" | "latn" | "thai")
})
.with_marker_attributes_filter("transliterator", |attrs| {
matches!(
attrs.as_str(),
"de-t-de-d0-ascii"
| "el-latn-t-s0-ascii"
| "el-latn-t-el-m0-bgn"
| "und-arab-t-und-beng"
| "und-latn-t-s0-ascii"
| "und-t-d0-publish"
| "und-t-s0-publish"
| "und-t-und-latn-d0-ascii"
| "und-x-bengali-interind"
| "und-x-interind-arabic"
)
})
.export(&provider, exporter)
.unwrap();
}
struct ZeroCopyCheckExporter {
zero_copy_violations: Mutex<BTreeSet<DataMarkerInfo>>,
zero_copy_transient_violations: Mutex<BTreeSet<DataMarkerInfo>>,
rountrip_errors: Mutex<BTreeMap<DataMarkerInfo, BTreeSet<String>>>,
}
const EXPECTED_VIOLATIONS: &[DataMarkerInfo] = &[];
const EXPECTED_TRANSIENT_VIOLATIONS: &[DataMarkerInfo] = &[
icu::list::provider::ListOrV1::INFO,
icu::list::provider::ListAndV1::INFO,
icu::list::provider::ListUnitV1::INFO,
];
impl DataExporter for ZeroCopyCheckExporter {
fn put_payload(
&self,
marker: DataMarkerInfo,
id: DataIdentifierBorrowed,
payload_before: &DataPayload<ExportMarker>,
) -> Result<(), DataError> {
use postcard::{
ser_flavors::{AllocVec, Flavor},
Serializer,
};
let mut serializer = Serializer {
output: AllocVec::new(),
};
payload_before.serialize(&mut serializer).unwrap();
let serialized = serializer.output.finalize().unwrap();
let buffer_payload = DataPayload::from_owned_buffer(serialized.into_boxed_slice());
MeasuringAllocator::start_measure();
let allocated;
let deallocated;
let payload_after;
macro_rules! cb {
($($marker_ty:ty:$marker:ident,)+ #[unstable] $($emarker_ty:ty:$emarker:ident,)+) => {
((allocated, deallocated), payload_after) = match marker {
k if k == icu_provider::hello_world::HelloWorldV1::INFO => {
let deserialized: DataPayload<icu_provider::hello_world::HelloWorldV1> = buffer_payload.into_deserialized(icu_provider::buf::BufferFormat::Postcard1).unwrap();
(MeasuringAllocator::end_measure(), UpcastDataPayload::upcast(deserialized))
}
$(
k if k == <$marker_ty>::INFO => {
let deserialized: DataPayload<$marker_ty> = buffer_payload.into_deserialized(icu_provider::buf::BufferFormat::Postcard1).unwrap();
(MeasuringAllocator::end_measure(), UpcastDataPayload::upcast(deserialized))
}
)+
$(
k if k == <$emarker_ty>::INFO => {
let deserialized: DataPayload<$emarker_ty> = buffer_payload.into_deserialized(icu_provider::buf::BufferFormat::Postcard1).unwrap();
(MeasuringAllocator::end_measure(), UpcastDataPayload::upcast(deserialized))
}
)+
_ => unreachable!("unregistered marker {marker:?}")
};
}
}
icu_provider_registry::registry!(cb);
if payload_before != &payload_after {
self.rountrip_errors
.lock()
.expect("poison")
.entry(marker)
.or_default()
.insert(
id.locale.to_string()
+ if id.marker_attributes.is_empty() {
""
} else {
"-x"
}
+ id.marker_attributes.as_str(),
);
}
if deallocated != allocated {
if !EXPECTED_VIOLATIONS.contains(&marker) {
eprintln!("Zerocopy violation {marker:?} {id:?}: {allocated}B allocated, {deallocated}B deallocated");
}
self.zero_copy_violations
.lock()
.expect("poison")
.insert(marker);
} else if allocated > 0 {
if !EXPECTED_TRANSIENT_VIOLATIONS.contains(&marker) {
eprintln!("Transient zerocopy violation {marker:?} {id:?}: {allocated}B allocated/deallocated");
}
self.zero_copy_transient_violations
.lock()
.expect("poison")
.insert(marker);
}
Ok(())
}
fn close(&mut self) -> Result<ExporterCloseMetadata, DataError> {
let rountrip_errors = self.rountrip_errors.get_mut().expect("poison");
rountrip_errors.remove(&icu::datetime::provider::names::DatetimeNamesMonthChineseV1::INFO);
rountrip_errors.remove(&icu::datetime::provider::names::DatetimeNamesMonthDangiV1::INFO);
rountrip_errors.remove(&icu::datetime::provider::names::DatetimeNamesMonthHebrewV1::INFO);
rountrip_errors.remove(&icu::datetime::provider::names::DatetimeNamesYearJapaneseV1::INFO);
assert_eq!(rountrip_errors, &mut BTreeMap::default());
let violations = self
.zero_copy_violations
.get_mut()
.expect("poison")
.iter()
.copied()
.collect::<Vec<_>>();
let transient_violations = self
.zero_copy_transient_violations
.get_mut()
.expect("poison")
.iter()
.copied()
.collect::<Vec<_>>();
assert!(transient_violations == EXPECTED_TRANSIENT_VIOLATIONS && violations == EXPECTED_VIOLATIONS,
"Expected violations list does not match found violations!\n\
If the new list is smaller, please update EXPECTED_VIOLATIONS in make-testdata.rs\n\
If it is bigger and that was unexpected, please make sure the marker remains zero-copy, or ask ICU4X team members if it is okay \
to temporarily allow for this marker to be allowlisted.\n\
Common cause: did you forget to add `serde(borrow)` to all of the fields in your data struct?\n\
Expected:\n{EXPECTED_VIOLATIONS:?}\nFound:\n{violations:?}\nExpected (transient):\n{EXPECTED_TRANSIENT_VIOLATIONS:?}\nFound (transient):\n{transient_violations:?}"
);
Ok(Default::default())
}
}
#[global_allocator]
static ALLOCATOR: MeasuringAllocator = MeasuringAllocator;
struct MeasuringAllocator;
impl MeasuringAllocator {
thread_local! {
static ACTIVE: Cell<bool> = const { Cell::new(false) };
static TOTAL_ALLOCATED: Cell<u64> = const { Cell::new(0) };
static TOTAL_DEALLOCATED: Cell<u64> = const { Cell::new(0) };
}
pub fn start_measure() {
Self::ACTIVE.with(|c| c.set(true));
}
pub fn end_measure() -> (u64, u64) {
Self::ACTIVE.with(|c| c.set(false));
(
Self::TOTAL_ALLOCATED.with(|c| c.take()),
Self::TOTAL_DEALLOCATED.with(|c| c.take()),
)
}
}
unsafe impl GlobalAlloc for MeasuringAllocator {
unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
if Self::ACTIVE.with(|f| f.get()) {
Self::TOTAL_ALLOCATED.with(|c| c.set(c.get() + layout.size() as u64));
}
System.alloc(layout)
}
unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
if Self::ACTIVE.with(|f| f.get()) {
Self::TOTAL_DEALLOCATED.with(|c| c.set(c.get() + layout.size() as u64));
}
System.dealloc(ptr, layout)
}
}