timezone_provider/
tzdb.rs

1//! `timezone_provider` is the core data provider implementations for `temporal_rs`
2
3// What are we even doing here? Why are providers needed?
4//
5// Two core data sources need to be accounted for:
6//
7//   - IANA identifier normalization (hopefully, semi easy)
8//   - IANA TZif data (much harder)
9//
10
11use std::{
12    borrow::Cow,
13    collections::{BTreeMap, BTreeSet},
14    fs, io,
15    path::Path,
16};
17
18use parse_zoneinfo::{
19    line::{Line, LineParser},
20    table::{Table, TableBuilder},
21};
22use zerotrie::{ZeroAsciiIgnoreCaseTrie, ZeroTrieBuildError};
23use zerovec::{VarZeroVec, ZeroVec};
24
25/// A data struct for IANA identifier normalization
26#[derive(PartialEq, Debug, Clone, yoke::Yokeable, serde::Serialize, databake::Bake)]
27#[databake(path = timezone_provider)]
28#[derive(serde::Deserialize)]
29pub struct IanaIdentifierNormalizer<'data> {
30    /// TZDB version
31    pub version: Cow<'data, str>,
32    /// An index to the location of the normal identifier.
33    #[serde(borrow)]
34    pub available_id_index: ZeroAsciiIgnoreCaseTrie<ZeroVec<'data, u8>>,
35
36    /// The normalized IANA identifier
37    #[serde(borrow)]
38    pub normalized_identifiers: VarZeroVec<'data, str>,
39}
40
41// ==== End Data marker implementation ====
42
43const ZONE_INFO_FILES: [&str; 9] = [
44    "africa",
45    "antarctica",
46    "asia",
47    "australasia",
48    "backward",
49    "etcetera",
50    "europe",
51    "northamerica",
52    "southamerica",
53];
54
55pub struct TzdbDataProvider {
56    version: String,
57    data: Table,
58}
59
60impl TzdbDataProvider {
61    pub fn new(tzdata: &Path) -> Result<Self, io::Error> {
62        let parser = LineParser::default();
63        let mut builder = TableBuilder::default();
64
65        let version_file = tzdata.join("version");
66        let version = fs::read_to_string(version_file)?.trim().into();
67
68        for filename in ZONE_INFO_FILES {
69            let file_path = tzdata.join(filename);
70            let file = fs::read_to_string(file_path)?;
71
72            for line in file.lines() {
73                match parser.parse_str(line) {
74                    Ok(Line::Zone(zone)) => builder.add_zone_line(zone).unwrap(),
75                    Ok(Line::Continuation(cont)) => builder.add_continuation_line(cont).unwrap(),
76                    Ok(Line::Rule(rule)) => builder.add_rule_line(rule).unwrap(),
77                    Ok(Line::Link(link)) => builder.add_link_line(link).unwrap(),
78                    Ok(Line::Space) => {}
79                    Err(e) => eprintln!("{e}"),
80                }
81            }
82        }
83
84        Ok(Self {
85            version,
86            data: builder.build(),
87        })
88    }
89}
90
91// ==== Begin DataProvider impl ====
92
93#[derive(Debug)]
94pub enum IanaDataError {
95    Io(io::Error),
96    Build(ZeroTrieBuildError),
97}
98
99impl IanaIdentifierNormalizer<'_> {
100    pub fn build(tzdata: &Path) -> Result<Self, IanaDataError> {
101        let provider = TzdbDataProvider::new(tzdata).unwrap();
102        let mut identifiers = BTreeSet::default();
103        for zoneset_id in provider.data.zonesets.keys() {
104            // Add canonical identifiers.
105            let _ = identifiers.insert(zoneset_id.clone());
106        }
107        for links in provider.data.links.keys() {
108            // Add link / non-canonical identifiers
109            let _ = identifiers.insert(links.clone());
110        }
111
112        // Create trie and bin search the index from Vec
113        let norm_vec: Vec<String> = identifiers.iter().cloned().collect();
114        let norm_zerovec: VarZeroVec<'static, str> = norm_vec.as_slice().into();
115
116        let identier_map: BTreeMap<Vec<u8>, usize> = identifiers
117            .iter()
118            .map(|id| {
119                (
120                    id.to_ascii_lowercase().as_bytes().to_vec(),
121                    norm_vec.binary_search(id).unwrap(),
122                )
123            })
124            .collect();
125
126        Ok(IanaIdentifierNormalizer {
127            version: provider.version.into(),
128            available_id_index: ZeroAsciiIgnoreCaseTrie::try_from(&identier_map)
129                .map_err(IanaDataError::Build)?
130                .convert_store(),
131            normalized_identifiers: norm_zerovec,
132        })
133    }
134}
135
136// ==== End DataProvider impl ====