1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
//! Determining the structure of a set of ruleset names.
//!
//! The names of time zones in the zoneinfo database are of the form
//! `Area/Location`, or more rarely, `Area/Location/Sublocation`. This means
//! they form a hierarchy, with each level either serving as a time zone
//! itself (usually a location) or as a parent of multiple other entries
//! (usually an area).
//!
//! When generating Rust code containing the timezone data, we need to
//! generate the entire tree structure, not just the leaves of actual timezone
//! data. This module determines that structure, allowing it to be created
//! before any actual timezone data is written.
//!
//! For example, say we have the following subset of time zone entries:
//!
//! - America/Antigua
//! - America/Araguaina
//! - America/Argentina/Buenos_Aires
//! - America/Argentina/Catamarca
//! - America/Argentina/Cordoba
//! - America/Aruba
//!
//! On top of the six actual time zone files, we would need to create the following:
//!
//! - An America module that has three private submodules (Antigua, Araguaína,
//!   and Aruba) and one public submodule (Argentina);
//! - An America/Argentina submodule that has there private submodules (Buenos
//!   Aires, Catamarca, Cordoba).
//!
//! This module contains an iterator that finds all parent zonesets, and
//! sorts them so they’re output in a correct order.

use std::collections::{BTreeMap, BTreeSet};

use table::Table;

/// Trait to put the `structure` method on Tables.
pub trait Structure {
    /// Returns an iterator over the structure of this table.
    fn structure(&self) -> TableStructure;
}

impl Structure for Table {
    fn structure(&self) -> TableStructure {
        let mut mappings = BTreeMap::new();

        for key in self.zonesets.keys().chain(self.links.keys()) {
            // Extract the name from the *last* slash. So
            // `America/Kentucky/Louisville` is split into
            // `America/Kentucky` and `Louisville` components.
            let last_slash = match key.rfind('/') {
                Some(pos) => pos,
                None => continue,
            };

            // Split the string around the slash, which gets removed.
            let parent = &key[..last_slash];
            {
                let set = mappings.entry(parent).or_insert_with(BTreeSet::new);
                set.insert(Child::TimeZone(&key[last_slash + 1..]));
            }

            // If the *parent* name still has a slash in it, then this is
            // a time zone of the form `America/Kentucky/Louisville`. We
            // need to make sure that `America` now has a `Kentucky`
            // child, too.
            if let Some(first_slash) = parent.find('/') {
                let grandparent = &parent[..first_slash];
                let set = mappings.entry(grandparent).or_insert_with(BTreeSet::new);
                set.insert(Child::Submodule(&parent[first_slash + 1..]));
            }
        }

        TableStructure { mappings: mappings }
    }
}

/// The structure of a set of time zone names.
#[derive(PartialEq, Debug)]
pub struct TableStructure<'table> {
    mappings: BTreeMap<&'table str, BTreeSet<Child<'table>>>,
}

impl<'table> IntoIterator for TableStructure<'table> {
    type Item = TableStructureEntry<'table>;
    type IntoIter = Iter<'table>;

    fn into_iter(self) -> Self::IntoIter {
        // It’s necessary to sort the keys before producing them, to
        // ensure that (for example) `America` is produced before
        // `America/Kentucky`.
        let mut keys: Vec<_> = self.mappings.keys().cloned().collect();
        keys.sort_by(|a, b| b.cmp(a));

        Iter {
            structure: self,
            keys: keys,
        }
    }
}

/// Iterator over sorted entries in a `TableStructure`.
#[derive(PartialEq, Debug)]
pub struct Iter<'table> {
    structure: TableStructure<'table>,
    keys: Vec<&'table str>,
}

impl<'table> Iterator for Iter<'table> {
    type Item = TableStructureEntry<'table>;

    fn next(&mut self) -> Option<Self::Item> {
        loop {
            let key = match self.keys.pop() {
                Some(k) => k,
                None => return None,
            };

            // Move the strings out into an (automatically-sorted) vector.
            let values = self.structure.mappings[key].iter().cloned().collect();

            return Some(TableStructureEntry {
                name: key,
                children: values,
            });
        }
    }
}

/// An entry returned from a `TableStructure` iterator.
#[derive(PartialEq, Debug)]
pub struct TableStructureEntry<'table> {
    /// This entry’s name, which *can* still include slashes.
    pub name: &'table str,

    /// A vector of sorted child names, which should have no slashes in.
    pub children: Vec<Child<'table>>,
}

/// A child module that needs to be created.
///
/// The order here is important for `PartialOrd`: submodules need to be
/// created before actual time zones, as directories need to be created
/// before the files in them can be written.
#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Copy, Clone)]
pub enum Child<'table> {
    /// A module containing **only** submodules, no time zones.
    Submodule(&'table str),

    /// A module containing **only** the details of a time zone.
    TimeZone(&'table str),
}

#[cfg(test)]
#[allow(unused_results)]
mod test {
    use super::*;
    use table::Table;

    #[test]
    fn empty() {
        let table = Table::default();
        let mut structure = table.structure().into_iter();
        assert_eq!(structure.next(), None);
    }

    #[test]
    fn separate() {
        let mut table = Table::default();
        table.zonesets.insert("a".to_owned(), Vec::new());
        table.zonesets.insert("b".to_owned(), Vec::new());
        table.zonesets.insert("c".to_owned(), Vec::new());

        let mut structure = table.structure().into_iter();
        assert_eq!(structure.next(), None);
    }

    #[test]
    fn child() {
        let mut table = Table::default();
        table.zonesets.insert("a/b".to_owned(), Vec::new());

        let mut structure = table.structure().into_iter();
        assert_eq!(
            structure.next(),
            Some(TableStructureEntry {
                name: &"a".to_owned(),
                children: vec![Child::TimeZone("b")]
            })
        );
        assert_eq!(structure.next(), None);
    }

    #[test]
    fn hierarchy() {
        let mut table = Table::default();
        table.zonesets.insert("a/b/c".to_owned(), Vec::new());
        table.zonesets.insert("a/b/d".to_owned(), Vec::new());
        table.zonesets.insert("a/e".to_owned(), Vec::new());

        let mut structure = table.structure().into_iter();
        assert_eq!(
            structure.next(),
            Some(TableStructureEntry {
                name: &"a".to_owned(),
                children: vec![Child::Submodule("b"), Child::TimeZone("e")]
            })
        );
        assert_eq!(
            structure.next(),
            Some(TableStructureEntry {
                name: &"a/b".to_owned(),
                children: vec![Child::TimeZone("c"), Child::TimeZone("d")]
            })
        );
        assert_eq!(structure.next(), None);
    }
}