Skip to main content

mlt_core/frames/v01/
tile.rs

1//! Row-oriented "source form" for the optimizer.
2//!
3//! [`TileLayer01`] holds one [`TileFeature`] per map feature, each owning
4//! its geometry as a [`geo_types::Geometry<i32>`] and its property values as a
5//! plain `Vec<PropValue>`.  This is the working form used throughout the
6//! optimizer and sorting pipeline: it is cheap to clone, trivially sortable,
7//! and free from any encoded/decoded duality.
8//!
9//! The only conversion from [`TileLayer01`] to [`StagedLayer01`] is [`From`] at the
10//! optimizer exit boundary; there is no encoded→decoded conversion from Staged back to Tile.
11
12use crate::errors::AsMltError as _;
13use crate::v01::{
14    GeometryValues, IdValues, Layer01, ParsedProperty, PropValue, StagedLayer01, StagedProperty,
15    StagedScalar, StagedSharedDict, StagedStrings, TileFeature, TileLayer01,
16    build_staged_shared_dict,
17};
18use crate::{Decoder, MltError, MltResult};
19
20// ── Layer01 → TileLayer01 ────────────────────────────────────────────────────
21
22impl Layer01<'_> {
23    /// Decode and convert into a row-oriented [`TileLayer01`], charging every
24    /// heap allocation against `dec`.
25    ///
26    /// Callers do not need to pre-call `decode_all` on the source layer.
27    pub fn into_tile(self, dec: &mut Decoder) -> MltResult<TileLayer01> {
28        let id = self.id.map(|id| id.into_parsed(dec)).transpose()?;
29        let geometry = self.geometry.into_parsed(dec)?;
30        let properties: Vec<ParsedProperty<'_>> = self
31            .properties
32            .into_iter()
33            .map(|p| p.into_parsed(dec))
34            .collect::<Result<Vec<_>, _>>()?;
35
36        let n = geometry.vector_types.len();
37
38        let mut property_names: Vec<String> = Vec::new();
39        for prop in &properties {
40            match prop {
41                ParsedProperty::SharedDict(sd) => {
42                    for item in &sd.items {
43                        property_names.push(format!("{}{}", sd.prefix, item.suffix));
44                    }
45                }
46                other => property_names.push(other.name().to_string()),
47            }
48        }
49
50        let ids: Option<&[Option<u64>]> = id.as_ref().map(|d| d.0.as_slice());
51
52        let mut features = dec.alloc::<TileFeature>(n)?;
53        for i in 0..n {
54            let feat_id = ids.and_then(|ids| ids.get(i).copied().flatten());
55            let geom = geometry.to_geojson(i)?;
56            let mut values = dec.alloc::<PropValue>(property_names.len())?;
57            for prop in &properties {
58                extract_parsed_values(prop, i, &mut values);
59            }
60
61            // Charge owned String bytes inside PropValue::Str.
62            charge_str_props(dec, &values)?;
63
64            features.push(TileFeature {
65                id: feat_id,
66                geometry: geom,
67                properties: values,
68            });
69        }
70
71        Ok(TileLayer01 {
72            name: self.name.to_string(),
73            extent: self.extent,
74            property_names,
75            features,
76        })
77    }
78}
79
80/// Extract the per-feature value at index `i` from a parsed property column
81/// and push it (or them, for `SharedDict`) into `out`.
82fn extract_parsed_values(prop: &ParsedProperty<'_>, i: usize, out: &mut Vec<PropValue>) {
83    use crate::v01::ParsedProperty as P;
84    match prop {
85        P::Bool(s) => out.push(PropValue::Bool(s.values[i])),
86        P::I8(s) => out.push(PropValue::I8(s.values[i])),
87        P::U8(s) => out.push(PropValue::U8(s.values[i])),
88        P::I32(s) => out.push(PropValue::I32(s.values[i])),
89        P::U32(s) => out.push(PropValue::U32(s.values[i])),
90        P::I64(s) => out.push(PropValue::I64(s.values[i])),
91        P::U64(s) => out.push(PropValue::U64(s.values[i])),
92        P::F32(s) => out.push(PropValue::F32(s.values[i])),
93        P::F64(s) => out.push(PropValue::F64(s.values[i])),
94        P::Str(s) => {
95            let val = s
96                .get(u32::try_from(i).unwrap_or(u32::MAX))
97                .map(str::to_string);
98            out.push(PropValue::Str(val));
99        }
100        P::SharedDict(sd) => {
101            for item in &sd.items {
102                let val = item.get(sd, i).map(str::to_string);
103                out.push(PropValue::Str(val));
104            }
105        }
106    }
107}
108
109// ── TileLayer01 → StagedLayer01 ─────────────────────────────────────────────
110
111/// FIXME: this should be part of the [`crate::v01::optimizer::Tile01Encoder::encode`]
112///   `rebuild_properties` would use proper shared dict grouping settings
113impl From<TileLayer01> for StagedLayer01 {
114    fn from(source: TileLayer01) -> Self {
115        // Rebuild geometry column
116        let mut geometry = GeometryValues::default();
117        for f in &source.features {
118            geometry.push_geom(&f.geometry);
119        }
120
121        let id = if source.features.iter().any(|f| f.id.is_some()) {
122            Some(IdValues(source.features.iter().map(|f| f.id).collect()))
123        } else {
124            None
125        };
126
127        let num_cols = source.property_names.len();
128        let properties = rebuild_properties(&source.property_names, &source.features, num_cols);
129
130        StagedLayer01 {
131            name: source.name,
132            extent: source.extent,
133            id,
134            geometry,
135            properties,
136        }
137    }
138}
139
140/// Rebuild the property columns from per-feature `PropValue` rows.
141///
142/// Each column index `c` maps to a column name in `property_names[c]`.
143/// A `SharedDict` column is detected by two or more consecutive names sharing
144/// the same `"prefix:"` portion.  All other columns become scalar columns.
145fn rebuild_properties(
146    names: &[String],
147    features: &[TileFeature],
148    num_cols: usize,
149) -> Vec<StagedProperty> {
150    if num_cols == 0 {
151        return Vec::new();
152    }
153
154    let mut result = Vec::new();
155    let mut col = 0;
156
157    while col < num_cols {
158        // Check if the next column(s) form a SharedDict group (share the same prefix).
159        let (prefix, _suffix) = split_prefix(&names[col]);
160
161        if let Some(dict_prefix) = prefix {
162            let start_col = col;
163            let mut group_end = col + 1;
164            while group_end < num_cols {
165                let (p, _) = split_prefix(&names[group_end]);
166                if p == Some(dict_prefix) {
167                    group_end += 1;
168                } else {
169                    break;
170                }
171            }
172
173            if group_end > start_col + 1 {
174                // Multiple columns with the same prefix → SharedDict
175                let shared_dict =
176                    rebuild_shared_dict(dict_prefix, names, features, start_col, group_end);
177                result.push(StagedProperty::SharedDict(shared_dict));
178                col = group_end;
179                continue;
180            }
181        }
182
183        // Single scalar column
184        let prop = rebuild_scalar_column(&names[col], col, features);
185        result.push(prop);
186        col += 1;
187    }
188
189    result
190}
191
192/// Split `"prefix:suffix"` into `(Some("prefix"), "suffix")`, or
193/// `(None, name)` if there is no colon.
194fn split_prefix(name: &str) -> (Option<&str>, &str) {
195    if let Some(pos) = name.find(':') {
196        (Some(&name[..pos]), &name[pos + 1..])
197    } else {
198        (None, name)
199    }
200}
201
202fn rebuild_scalar_column(name: &str, col: usize, features: &[TileFeature]) -> StagedProperty {
203    // Determine the variant by looking at the first non-None feature value.
204    // Fall back to Str if all values are None or the column is empty.
205    let first_val = features.iter().find_map(|f| f.properties.get(col));
206
207    macro_rules! scalar_col {
208        ($variant:ident, $ty:ty, $sv:ident) => {{
209            let values: Vec<Option<$ty>> = features
210                .iter()
211                .map(|f| {
212                    if let Some(PropValue::$sv(v)) = f.properties.get(col) {
213                        *v
214                    } else {
215                        None
216                    }
217                })
218                .collect();
219            StagedProperty::$variant(StagedScalar {
220                name: name.to_string(),
221                values,
222            })
223        }};
224    }
225
226    match first_val {
227        Some(PropValue::Bool(_)) => scalar_col!(Bool, bool, Bool),
228        Some(PropValue::I8(_)) => scalar_col!(I8, i8, I8),
229        Some(PropValue::U8(_)) => scalar_col!(U8, u8, U8),
230        Some(PropValue::I32(_)) => scalar_col!(I32, i32, I32),
231        Some(PropValue::U32(_)) => scalar_col!(U32, u32, U32),
232        Some(PropValue::I64(_)) => scalar_col!(I64, i64, I64),
233        Some(PropValue::U64(_)) => scalar_col!(U64, u64, U64),
234        Some(PropValue::F32(_)) => scalar_col!(F32, f32, F32),
235        Some(PropValue::F64(_)) => scalar_col!(F64, f64, F64),
236        Some(PropValue::Str(_)) | None => {
237            let values: Vec<Option<String>> = features
238                .iter()
239                .map(|f| {
240                    if let Some(PropValue::Str(v)) = f.properties.get(col) {
241                        v.clone()
242                    } else {
243                        None
244                    }
245                })
246                .collect();
247            let mut ds = StagedStrings::from(values);
248            ds.name = name.to_string();
249            StagedProperty::Str(ds)
250        }
251    }
252}
253
254fn rebuild_shared_dict(
255    prefix: &str,
256    names: &[String],
257    features: &[TileFeature],
258    start_col: usize,
259    end_col: usize,
260) -> StagedSharedDict {
261    // Build per-item (start,end) ranges from raw feature data, then
262    // call build_staged_shared_dict to deduplicate into a shared corpus.
263    let items_raw: Vec<(String, StagedStrings)> = (start_col..end_col)
264        .map(|c| {
265            let (_, suffix) = split_prefix(&names[c]);
266            let values: Vec<Option<String>> = features
267                .iter()
268                .map(|f| {
269                    if let Some(PropValue::Str(s)) = f.properties.get(c) {
270                        s.clone()
271                    } else {
272                        None
273                    }
274                })
275                .collect();
276            (suffix.to_string(), StagedStrings::from(values))
277        })
278        .collect();
279
280    // Set the suffix names on each StagedStrings (for the corpus-dedup step).
281    // The names aren't stored in StagedStrings.name here; they're passed as the
282    // tuple key to build_staged_shared_dict.
283    build_staged_shared_dict(prefix.to_string(), items_raw)
284        .expect("rebuild_shared_dict should always succeed for valid feature data")
285}
286
287// ── Helpers ───────────────────────────────────────────────────────────────────
288
289/// Charge `dec` for the heap bytes of owned `String` values inside `PropValue::Str`.
290fn charge_str_props(dec: &mut Decoder, props: &[PropValue]) -> MltResult<()> {
291    let str_bytes = props
292        .iter()
293        .filter_map(|p| {
294            if let PropValue::Str(Some(s)) = p {
295                Some(s.len())
296            } else {
297                None
298            }
299        })
300        .try_fold(0u32, |acc, n| {
301            acc.checked_add(u32::try_from(n).or_overflow()?)
302                .ok_or(MltError::IntegerOverflow)
303        })?;
304    if str_bytes > 0 {
305        dec.consume(str_bytes)?;
306    }
307    Ok(())
308}