chrono_tz_build/
lib.rs

1extern crate parse_zoneinfo;
2#[cfg(feature = "filter-by-regex")]
3extern crate regex;
4
5use std::collections::BTreeSet;
6use std::env;
7use std::fs::File;
8use std::io::{self, BufRead, BufReader, Write};
9use std::path::{Path, PathBuf};
10
11use parse_zoneinfo::line::Line;
12use parse_zoneinfo::structure::{Child, Structure};
13use parse_zoneinfo::table::{Table, TableBuilder};
14use parse_zoneinfo::transitions::FixedTimespan;
15use parse_zoneinfo::transitions::TableTransitions;
16use parse_zoneinfo::FILES;
17
18/// The name of the environment variable which possibly holds the filter regex.
19#[cfg(feature = "filter-by-regex")]
20pub const FILTER_ENV_VAR_NAME: &str = "CHRONO_TZ_TIMEZONE_FILTER";
21
22// This function is needed until zoneinfo_parse handles comments correctly.
23// Technically a '#' symbol could occur between double quotes and should be
24// ignored in this case, however this never happens in the tz database as it
25// stands.
26fn strip_comments(mut line: String) -> String {
27    if let Some(pos) = line.find('#') {
28        line.truncate(pos);
29    };
30    line
31}
32
33// Generate a list of the time zone periods beyond the first that apply
34// to this zone, as a string representation of a static slice.
35fn format_rest(rest: Vec<(i64, FixedTimespan)>) -> String {
36    let mut ret = "&[\n".to_string();
37    for (
38        start,
39        FixedTimespan {
40            utc_offset,
41            dst_offset,
42            name,
43        },
44    ) in rest
45    {
46        let timespan_name = match name.as_ref() {
47            "%z" => None,
48            name => Some(name),
49        };
50        ret.push_str(&format!(
51            "                    ({start}, FixedTimespan {{ \
52             utc_offset: {utc_offset}, dst_offset: {dst_offset}, name: {timespan_name:?} \
53             }}),\n",
54        ));
55    }
56    ret.push_str("                ]");
57    ret
58}
59
60// Convert all '/' to '__', all '+' to 'Plus' and '-' to 'Minus', unless
61// it's a hyphen, in which case remove it. This is so the names can be used
62// as rust identifiers.
63fn convert_bad_chars(name: &str) -> String {
64    let name = name.replace('/', "__").replace('+', "Plus");
65    if let Some(pos) = name.find('-') {
66        if name[pos + 1..]
67            .chars()
68            .next()
69            .map(char::is_numeric)
70            .unwrap_or(false)
71        {
72            name.replace('-', "Minus")
73        } else {
74            name.replace('-', "")
75        }
76    } else {
77        name
78    }
79}
80
81// The timezone file contains impls of `Timespans` for all timezones in the
82// database. The `Wrap` wrapper in the `timezone_impl` module then implements
83// TimeZone for any contained struct that implements `Timespans`.
84fn write_timezone_file(timezone_file: &mut File, table: &Table, uncased: bool) -> io::Result<()> {
85    let zones = table
86        .zonesets
87        .keys()
88        .chain(table.links.keys())
89        .collect::<BTreeSet<_>>();
90    writeln!(
91        timezone_file,
92        "use core::fmt::{{self, Debug, Display, Formatter}};",
93    )?;
94    writeln!(timezone_file, "use core::str::FromStr;\n",)?;
95    writeln!(
96        timezone_file,
97        "use crate::timezone_impl::{{TimeSpans, FixedTimespanSet, FixedTimespan}};\n",
98    )?;
99    writeln!(
100        timezone_file,
101        "/// TimeZones built at compile time from the tz database
102///
103/// This implements [`chrono::TimeZone`] so that it may be used in and to
104/// construct chrono's DateTime type. See the root module documentation
105/// for details."
106    )?;
107    writeln!(timezone_file, "#[derive(Clone, Copy, PartialEq, Eq, Hash)]")?;
108    writeln!(
109        timezone_file,
110        r#"#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]"#
111    )?;
112    writeln!(timezone_file, "pub enum Tz {{")?;
113    for zone in &zones {
114        let zone_name = convert_bad_chars(zone);
115        writeln!(timezone_file, "    /// {zone}\n    {zone_name},")?;
116    }
117    writeln!(timezone_file, "}}")?;
118
119    let mut map = phf_codegen::Map::new();
120    for zone in &zones {
121        map.entry(zone, format!("Tz::{}", convert_bad_chars(zone)));
122    }
123    writeln!(
124        timezone_file,
125        "static TIMEZONES: ::phf::Map<&'static str, Tz> = \n{};",
126        map.build()
127    )?;
128
129    #[cfg(feature = "case-insensitive")]
130    if uncased {
131        writeln!(timezone_file, "use uncased::UncasedStr;\n",)?;
132        let mut map = phf_codegen::Map::new();
133        for zone in &zones {
134            map.entry(
135                uncased::UncasedStr::new(zone),
136                format!("Tz::{}", convert_bad_chars(zone)),
137            );
138        }
139        writeln!(
140            timezone_file,
141            "static TIMEZONES_UNCASED: ::phf::Map<&'static uncased::UncasedStr, Tz> = \n{};",
142            map.build()
143        )?;
144    }
145
146    writeln!(
147        timezone_file,
148        r#"#[derive(Copy, Clone, Debug, PartialEq, Eq)]
149pub struct ParseError(());
150
151impl Display for ParseError {{
152    fn fmt(&self, f: &mut Formatter) -> fmt::Result {{
153        f.write_str("failed to parse timezone")
154    }}
155}}
156
157#[cfg(feature = "std")]
158impl std::error::Error for ParseError {{}}
159
160impl FromStr for Tz {{
161    type Err = ParseError;
162    fn from_str(s: &str) -> Result<Self, Self::Err> {{
163        TIMEZONES.get(s).cloned().ok_or(ParseError(()))
164    }}
165}}
166"#
167    )?;
168
169    writeln!(
170        timezone_file,
171        "impl Tz {{
172    pub fn name(self) -> &'static str {{
173        match self {{"
174    )?;
175    for zone in &zones {
176        let zone_name = convert_bad_chars(zone);
177        writeln!(timezone_file, "            Tz::{zone_name} => \"{zone}\",")?;
178    }
179    writeln!(
180        timezone_file,
181        "        }}
182    }}"
183    )?;
184
185    if uncased {
186        writeln!(
187            timezone_file,
188            r#"
189    #[cfg(feature = "case-insensitive")]
190    /// Parses a timezone string in a case-insensitive way
191    pub fn from_str_insensitive(s: &str) -> Result<Self, ParseError> {{
192        return TIMEZONES_UNCASED.get(s.into()).cloned().ok_or(ParseError(()));
193    }}"#
194        )?;
195    }
196
197    writeln!(timezone_file, "}}")?;
198
199    writeln!(
200        timezone_file,
201        "impl Debug for Tz {{
202    fn fmt(&self, f: &mut Formatter) -> fmt::Result {{
203        f.write_str(self.name().as_ref())
204    }}
205}}\n"
206    )?;
207    writeln!(
208        timezone_file,
209        "impl Display for Tz {{
210    fn fmt(&self, f: &mut Formatter) -> fmt::Result {{
211        f.write_str(self.name().as_ref())
212    }}
213}}\n"
214    )?;
215    writeln!(
216        timezone_file,
217        "impl TimeSpans for Tz {{
218    fn timespans(&self) -> FixedTimespanSet {{
219        match *self {{"
220    )?;
221    for zone in &zones {
222        let timespans = table.timespans(zone).unwrap();
223        let zone_name = convert_bad_chars(zone);
224        let timespan_name = match timespans.first.name.as_ref() {
225            "%z" => None,
226            name => Some(name),
227        };
228        writeln!(
229            timezone_file,
230            "            Tz::{zone} => {{
231                const REST: &[(i64, FixedTimespan)] = {rest};
232                FixedTimespanSet {{
233                    first: FixedTimespan {{
234                        utc_offset: {utc},
235                        dst_offset: {dst},
236                        name: {name:?},
237                    }},
238                    rest: REST
239                }}
240            }},\n",
241            zone = zone_name,
242            rest = format_rest(timespans.rest),
243            utc = timespans.first.utc_offset,
244            dst = timespans.first.dst_offset,
245            name = timespan_name,
246        )?;
247    }
248    write!(
249        timezone_file,
250        "         }}
251    }}
252}}\n"
253    )?;
254    write!(
255        timezone_file,
256        "/// An array of every known variant
257///
258/// Useful for iterating over known timezones:
259///
260/// ```
261/// use chrono_tz::{{TZ_VARIANTS, Tz}};
262/// assert!(TZ_VARIANTS.iter().any(|v| *v == Tz::UTC));
263/// ```
264pub static TZ_VARIANTS: [Tz; {num}] = [
265",
266        num = zones.len()
267    )?;
268    for zone in &zones {
269        writeln!(
270            timezone_file,
271            "    Tz::{zone},",
272            zone = convert_bad_chars(zone)
273        )?;
274    }
275    write!(timezone_file, "];")?;
276    Ok(())
277}
278
279// Create a file containing nice-looking re-exports such as Europe::London
280// instead of having to use chrono_tz::timezones::Europe__London
281fn write_directory_file(directory_file: &mut File, table: &Table, version: &str) -> io::Result<()> {
282    // expose the underlying IANA TZDB version
283    writeln!(
284        directory_file,
285        "pub const IANA_TZDB_VERSION: &str = \"{version}\";\n"
286    )?;
287    // add the `loose' zone definitions first
288    writeln!(directory_file, "use crate::timezones::Tz;\n")?;
289    let zones = table
290        .zonesets
291        .keys()
292        .chain(table.links.keys())
293        .filter(|zone| !zone.contains('/'))
294        .collect::<BTreeSet<_>>();
295    for zone in zones {
296        let zone = convert_bad_chars(zone);
297        writeln!(directory_file, "pub const {zone}: Tz = Tz::{zone};")?;
298    }
299    writeln!(directory_file)?;
300
301    // now add the `structured' zone names in submodules
302    let mut first = true;
303    for entry in table.structure() {
304        if entry.name.contains('/') {
305            continue;
306        }
307
308        match first {
309            true => first = false,
310            false => writeln!(directory_file, "")?,
311        }
312
313        let module_name = convert_bad_chars(entry.name);
314        writeln!(directory_file, "pub mod {module_name} {{")?;
315        writeln!(directory_file, "    use crate::timezones::Tz;\n",)?;
316        for child in entry.children {
317            let name = match child {
318                Child::Submodule(name) => name,
319                Child::TimeZone(name) => {
320                    let name = convert_bad_chars(name);
321                    writeln!(
322                        directory_file,
323                        "    pub const {name}: Tz = Tz::{module_name}__{name};"
324                    )?;
325                    continue;
326                }
327            };
328
329            let submodule_name = convert_bad_chars(name);
330            writeln!(directory_file, "    pub mod {submodule_name} {{")?;
331            writeln!(directory_file, "        use crate::timezones::Tz;\n",)?;
332            let full_name = entry.name.to_string() + "/" + name;
333            for entry in table.structure() {
334                if entry.name != full_name {
335                    continue;
336                }
337
338                for child in entry.children {
339                    let name = match child {
340                        Child::Submodule(_) => {
341                            panic!("Depth of > 3 nested submodules not implemented!")
342                        }
343                        Child::TimeZone(name) => name,
344                    };
345
346                    let converted_name = convert_bad_chars(name);
347                    writeln!(directory_file,
348                        "        pub const {converted_name}: Tz = Tz::{module_name}__{submodule_name}__{converted_name};",
349                    )?;
350                }
351            }
352            writeln!(directory_file, "    }}\n")?;
353        }
354        writeln!(directory_file, "}}")?;
355    }
356
357    Ok(())
358}
359
360/// Module containing code supporting filter-by-regex feature
361///
362/// The "GMT" and "UTC" time zones are always included.
363#[cfg(feature = "filter-by-regex")]
364mod filter {
365    use std::collections::HashSet;
366    use std::env;
367
368    use regex::Regex;
369
370    use crate::{Table, FILTER_ENV_VAR_NAME};
371
372    /// Filter `table` by applying [`FILTER_ENV_VAR_NAME`].
373    pub(crate) fn maybe_filter_timezone_table(table: &mut Table) {
374        if let Some(filter_regex) = get_filter_regex() {
375            filter_timezone_table(table, filter_regex);
376        }
377    }
378
379    /// Checks the `CHRONO_TZ_TIMEZONE_FILTER` environment variable.
380    /// Converts it to a regex if set. Panics if the regex is not valid, as we want
381    /// to fail the build if that happens.
382    fn get_filter_regex() -> Option<Regex> {
383        match env::var(FILTER_ENV_VAR_NAME) {
384            Ok(val) => {
385                let val = val.trim();
386                if val.is_empty() {
387                    return None;
388                }
389                match Regex::new(val) {
390                    Ok(regex) => Some(regex),
391                    Err(err) => panic!(
392                        "The value '{val:?}' for environment variable {FILTER_ENV_VAR_NAME} is not a valid regex, err={err}"
393                    ),
394                }
395            }
396            Err(env::VarError::NotPresent) => None,
397            Err(env::VarError::NotUnicode(s)) => panic!(
398                "The value '{s:?}' for environment variable {FILTER_ENV_VAR_NAME} is not valid Unicode"
399            ),
400        }
401    }
402
403    /// Insert a new name in the list of names to keep. If the name has 3
404    /// parts, then also insert the 2-part prefix. If we don't do this we will lose
405    /// half of Indiana in `directory.rs`. But we *don't* want to keep one-part names,
406    /// otherwise we will inevitably end up with 'America' and include too much as
407    /// a consequence.
408    fn insert_keep_entry(keep: &mut HashSet<String>, new_value: &str) {
409        let mut parts = new_value.split('/');
410        if let (Some(p1), Some(p2), Some(_), None) =
411            (parts.next(), parts.next(), parts.next(), parts.next())
412        {
413            keep.insert(format!("{p1}/{p2}"));
414        }
415
416        keep.insert(new_value.to_string());
417    }
418
419    /// Filter `table` by applying `filter_regex`.
420    fn filter_timezone_table(table: &mut Table, filter_regex: Regex) {
421        // Compute the transitive closure of things to keep.
422        // Doing this, instead of just filtering `zonesets` and `links` by the
423        // regex, helps to keep the `structure()` intact.
424        let mut keep = HashSet::new();
425        for (k, v) in &table.links {
426            if filter_regex.is_match(k) || k == "GMT" || k == "UTC" {
427                insert_keep_entry(&mut keep, k);
428            }
429            if filter_regex.is_match(v) || k == "GMT" || k == "UTC" {
430                insert_keep_entry(&mut keep, v);
431            }
432        }
433
434        let mut n = 0;
435        loop {
436            let len = keep.len();
437
438            for (k, v) in &table.links {
439                if keep.contains(k) && !keep.contains(v) {
440                    insert_keep_entry(&mut keep, v);
441                }
442                if keep.contains(v) && !keep.contains(k) {
443                    insert_keep_entry(&mut keep, k);
444                }
445            }
446
447            if keep.len() == len {
448                break;
449            }
450
451            n += 1;
452            if n == 50 {
453                println!("cargo:warning=Recursion limit reached while building filter list");
454                break;
455            }
456        }
457
458        // Actually do the filtering.
459        table
460            .links
461            .retain(|k, v| keep.contains(k) || keep.contains(v));
462
463        table
464            .zonesets
465            .retain(|k, _| filter_regex.is_match(k) || keep.iter().any(|s| k.starts_with(s)));
466    }
467}
468
469fn detect_iana_db_version() -> String {
470    let root = env::var("CARGO_MANIFEST_DIR").expect("no Cargo build context");
471    let path = Path::new(&root).join(Path::new("tz/NEWS"));
472    let file = File::open(path).expect("failed to open file");
473
474    let mut lines = BufReader::new(file).lines();
475    while let Some(Ok(line)) = lines.next() {
476        let line = match line.strip_prefix("Release ") {
477            Some(line) => line,
478            _ => continue,
479        };
480
481        match line.split_once(" - ") {
482            Some((version, _)) => return version.to_owned(),
483            _ => continue,
484        }
485    }
486
487    unreachable!("no version found")
488}
489
490pub fn main(dir: &Path, _filter: bool, _uncased: bool) {
491    let mut table = TableBuilder::new();
492
493    let root = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap_or_else(|_| String::new()));
494    for fname in FILES {
495        let path = root.join(format!("tz/{fname}"));
496        let file =
497            File::open(&path).unwrap_or_else(|e| panic!("cannot open {}: {e}", path.display()));
498        for line in BufReader::new(file).lines() {
499            let line = strip_comments(line.unwrap());
500            table.add_line(Line::new(&line).unwrap()).unwrap();
501        }
502    }
503
504    #[allow(unused_mut)]
505    let mut table = table.build();
506    #[cfg(feature = "filter-by-regex")]
507    if _filter {
508        filter::maybe_filter_timezone_table(&mut table);
509    }
510
511    let timezone_path = dir.join("timezones.rs");
512    let mut timezone_file = File::create(timezone_path).unwrap();
513    write_timezone_file(&mut timezone_file, &table, _uncased).unwrap();
514
515    let directory_path = dir.join("directory.rs");
516    let mut directory_file = File::create(directory_path).unwrap();
517    let version = detect_iana_db_version();
518    write_directory_file(&mut directory_file, &table, &version).unwrap();
519}