Skip to main content

mlt_core/encoder/geometry/
encode.rs

1use std::collections::HashMap;
2use std::mem;
3
4use geo_types::Coord;
5use probabilistic_collections::SipHasherBuilder;
6use probabilistic_collections::hyperloglog::HyperLogLog;
7
8use super::model::VertexBufferType;
9use crate::MltResult;
10use crate::codecs::hilbert::hilbert_sort_key;
11use crate::codecs::zigzag::encode_componentwise_delta_vec2s;
12use crate::decoder::GeometryType::{LineString, Point, Polygon};
13use crate::decoder::{
14    ColumnType, DictionaryType, GeometryType, GeometryValues, LengthType, LogicalEncoding, Morton,
15    OffsetType, PhysicalEncoding, StreamMeta, StreamType,
16};
17use crate::encoder::model::{CurveParams, StreamCtx};
18use crate::encoder::{Codecs, Encoder, PhysicalCodecs, write_stream_payload};
19use crate::utils::AsUsize as _;
20
21/// Compute `ZOrderCurve` parameters from the vertex value range.
22///
23/// Returns `(bits, shift)` matching Java's `SpaceFillingCurve`.
24/// Build a sorted unique Morton dictionary and per-vertex offset indices from a flat
25/// `[x0, y0, x1, y1, …]` vertex slice.
26///
27/// Returns `(sorted_unique_codes, per_vertex_offsets)`.
28#[hotpath::measure]
29fn build_morton_dict(vertices: &[i32], meta: Morton) -> MltResult<(Vec<u32>, Vec<u32>)> {
30    let codes: Vec<u32> = vertices
31        .chunks_exact(2)
32        .map(|c| meta.encode_morton(c[0], c[1]))
33        .collect::<Result<_, _>>()?;
34
35    let mut dict = codes.clone();
36    dict.sort_unstable();
37    dict.dedup();
38
39    #[expect(
40        clippy::cast_possible_truncation,
41        reason = "dict.len() <= u32::MAX (deduped u32 codes)"
42    )]
43    let code_to_idx: HashMap<u32, u32> = dict
44        .iter()
45        .enumerate()
46        .map(|(i, &c)| (c, i as u32))
47        .collect();
48    let offsets: Vec<u32> = codes.iter().map(|code| code_to_idx[code]).collect();
49
50    Ok((dict, offsets))
51}
52
53/// Build a Hilbert-curve-sorted unique vertex dictionary into caller-provided
54/// scratch.
55///
56/// On return, `dict_xy` holds the deduplicated `[x, y, …]` dictionary in
57/// Hilbert order and `offsets[i]` is the slot of input vertex `i`; `indexed`
58/// and `remap` are left as opaque scratch.
59///
60/// Dedup is keyed on the Hilbert curve index. Inside the `params.bits` grid
61/// the index ↔ `(x, y)` mapping is bijective, so dedup-by-index is equivalent
62/// to dedup-by-coordinate without the cost of hashing pairs.
63#[hotpath::measure]
64fn build_hilbert_dict(
65    vertices: &[i32],
66    params: CurveParams,
67    offsets: &mut Vec<u32>,
68    indexed: &mut Vec<u64>,
69    dict_xy: &mut Vec<i32>,
70    remap: &mut HashMap<u32, u32>,
71) {
72    offsets.clear();
73    indexed.clear();
74    dict_xy.clear();
75    remap.clear();
76
77    let coord_count = vertices.len() / 2;
78    if coord_count == 0 {
79        return;
80    }
81    offsets.reserve(coord_count);
82    indexed.reserve(coord_count);
83    dict_xy.reserve(coord_count * 2);
84    remap.reserve(coord_count);
85
86    for (i, c) in vertices.chunks_exact(2).enumerate() {
87        let k = hilbert_sort_key(Coord { x: c[0], y: c[1] }, params);
88        offsets.push(k);
89        // Key in the high 32 bits so a single u64 sort orders by Hilbert
90        // index while preserving the original position for tie-breaking.
91        let packed = (u64::from(k) << 32) | (i as u64);
92        indexed.push(packed);
93    }
94    indexed.sort_unstable();
95
96    let mut last_key: Option<u32> = None;
97    for &packed in &*indexed {
98        let key = (packed >> 32) as u32;
99        let src_idx = (packed & 0xFFFF_FFFF) as usize;
100        if last_key != Some(key) {
101            #[expect(
102                clippy::cast_possible_truncation,
103                reason = "dict.len() <= coord_count <= u32::MAX"
104            )]
105            let slot = (dict_xy.len() / 2) as u32;
106            dict_xy.push(vertices[src_idx * 2]);
107            dict_xy.push(vertices[src_idx * 2 + 1]);
108            remap.insert(key, slot);
109            last_key = Some(key);
110        }
111    }
112
113    for k in offsets.iter_mut() {
114        *k = remap[k];
115    }
116}
117
118/// Push consecutive offset-differences from `offsets` onto `lengths`.
119///
120/// Expects a slice of `n + 1` elements and produces `n` lengths,
121/// one per consecutive pair: `offsets[i + 1] - offsets[i]`.
122#[inline]
123fn extend_offsets(lengths: &mut Vec<u32>, offsets: &[u32]) -> usize {
124    lengths.extend(offsets.windows(2).map(|w| w[1] - w[0]));
125    offsets.len() - 1
126}
127
128/// Convert geometry offsets to length stream for encoding.
129/// This is the inverse of `decode_root_length_stream`.
130///
131/// The offset array can be either:
132/// - Sparse: entries only for geometries that need them (types > `buffer_id`), N+1 entries for N matching geoms
133/// - Dense (normalized): N+1 entries for N geometry types, indexed by geometry position
134///
135/// If dense `(len == geom_types.len() + 1)`, use geometry index directly.
136/// If sparse, use sequential indexing for matching geometry types.
137fn encode_root_length_stream(
138    geom_types: &[GeometryType],
139    geom_offsets: &[u32],
140    buffer_id: GeometryType,
141) -> Vec<u32> {
142    if geom_offsets.len() == geom_types.len() + 1 {
143        // Dense: zip by position, then filter out non-contributing types.
144        geom_types
145            .iter()
146            .zip(geom_offsets.windows(2))
147            .filter(|&(&t, _)| t > buffer_id)
148            .map(|(_, w)| w[1] - w[0])
149            .collect()
150    } else {
151        // Sparse: filter types first, then zip with consecutive offset pairs.
152        geom_types
153            .iter()
154            .filter(|&&t| t > buffer_id)
155            .zip(geom_offsets.windows(2))
156            .map(|(_, w)| w[1] - w[0])
157            .collect()
158    }
159}
160
161/// Convert part offsets to length stream for level 1 encoding.
162fn encode_level1_length_stream(
163    geom_types: &[GeometryType],
164    geom_offsets: &[u32],
165    part_offsets: &[u32],
166    is_line_string_present: bool,
167) -> Vec<u32> {
168    let mut lengths = Vec::new();
169    let mut part_idx = 0;
170
171    for (i, &geom_type) in geom_types.iter().enumerate() {
172        if geom_type.is_polygon() || (is_line_string_present && geom_type.is_linestring()) {
173            let n = (geom_offsets[i + 1] - geom_offsets[i]).as_usize();
174            part_idx += extend_offsets(&mut lengths, &part_offsets[part_idx..=part_idx + n]);
175        }
176        // Note: Point/MultiPoint don't have entries in the sparse part_offsets used
177        // at this call site, so part_idx must not advance for non-length types here.
178    }
179
180    lengths
181}
182
183/// Compute ring vertex-count lengths for the no-geometry-offsets + has-ring-offsets case.
184///
185/// In this branch `part_offsets` is a **dense** N+1 array (one slot per geometry,
186/// including Points) and `ring_offsets` holds the vertex offsets for every slot.
187/// Using the geometry index directly as the ring-slot index avoids the
188/// running-counter misalignment that `encode_level1_length_stream` would produce
189/// when non-length types (Points) occupy slots that a sparse counter skips.
190fn encode_ring_lengths_for_mixed(
191    geom_types: &[GeometryType],
192    part_offsets: &[u32],
193    ring_offsets: &[u32],
194    has_line_string: bool,
195) -> Vec<u32> {
196    let mut lengths = Vec::new();
197    for (i, &geom_type) in geom_types.iter().enumerate() {
198        if geom_type.is_polygon() || (has_line_string && geom_type.is_linestring()) {
199            let s = part_offsets[i].as_usize();
200            let e = part_offsets[i + 1].as_usize();
201            extend_offsets(&mut lengths, &ring_offsets[s..=e]);
202        }
203    }
204    lengths
205}
206
207/// Convert ring offsets to length stream for level 2 encoding.
208/// This is the inverse of `decode_level2_length_stream`.
209///
210/// The `geom_offsets` array is expected to be an N+1 element array for N geometries.
211/// The `part_offsets` array tracks ring counts cumulatively.
212fn encode_level2_length_stream(
213    geom_types: &[GeometryType],
214    geom_offsets: &[u32],
215    part_offsets: &[u32],
216    ring_offsets: &[u32],
217) -> Vec<u32> {
218    let mut lengths = Vec::new();
219    let mut part_idx = 0;
220    let mut ring_idx = 0;
221
222    for (i, &geom_type) in geom_types.iter().enumerate() {
223        let count = (geom_offsets[i + 1] - geom_offsets[i]).as_usize();
224
225        // Only Polygon and MultiPolygon have ring data in level 2
226        // LineStrings with Polygon present add their vertex counts directly to ring_offsets,
227        // but they don't have parts (ring count per linestring is always 1 implicitly)
228        if geom_type.is_polygon() {
229            // Polygon/MultiPolygon: iterate through sub-polygons, each has parts (ring counts)
230            for _ in 0..count {
231                let n = (part_offsets[part_idx + 1] - part_offsets[part_idx]).as_usize();
232                ring_idx += extend_offsets(&mut lengths, &ring_offsets[ring_idx..=ring_idx + n]);
233                part_idx += 1;
234            }
235        } else if geom_type.is_linestring() {
236            // LineStrings contribute to ring_offsets directly (vertex counts)
237            ring_idx += extend_offsets(&mut lengths, &ring_offsets[ring_idx..=ring_idx + count]);
238        }
239        // Note: Point/MultiPoint don't contribute to ring_offsets
240    }
241
242    lengths
243}
244
245/// Convert part offsets without ring buffer to length stream.
246///
247/// This path is reached only when `ring_offsets` is absent, which means no Polygon/MultiPolygon
248/// types are present (they always create `ring_offsets`).  Only LineString/MultiLineString
249/// contribute vertex-count lengths here; Point/MultiPoint use an implicit count of 1 in the
250/// decoder and produce no entry in this stream.
251fn encode_level1_without_ring_buffer_length_stream(
252    geom_types: &[GeometryType],
253    geom_offsets: &[u32],
254    part_offsets: &[u32],
255) -> Vec<u32> {
256    let mut lengths = Vec::new();
257    let mut part_idx = 0;
258
259    for (i, &geom_type) in geom_types.iter().enumerate() {
260        if geom_type.is_linestring() {
261            let n = (geom_offsets[i + 1] - geom_offsets[i]).as_usize();
262            part_idx += extend_offsets(&mut lengths, &part_offsets[part_idx..=part_idx + n]);
263        }
264        // Point/MultiPoint don't contribute to part_offsets; part_idx must not advance.
265    }
266
267    lengths
268}
269
270/// Normalize `geom_offsets` for mixed geometry types.
271fn normalize_geometry_offsets(vector_types: &[GeometryType], geom_offsets: &[u32]) -> Vec<u32> {
272    let mut normalized = Vec::with_capacity(vector_types.len() + 1);
273    let mut offset = 0_u32;
274    let mut sparse_idx = 0_usize; // Index into sparse geom_offsets
275
276    for &geom_type in vector_types {
277        normalized.push(offset);
278
279        if geom_type.is_multi() {
280            // Multi* types get their count from the sparse array
281            if sparse_idx + 1 < geom_offsets.len() {
282                let start = geom_offsets[sparse_idx];
283                let end = geom_offsets[sparse_idx + 1];
284                offset += end - start;
285                sparse_idx += 1;
286            }
287        } else {
288            // Non-Multi types have implicit count of 1
289            offset += 1;
290        }
291    }
292
293    normalized.push(offset);
294    normalized
295}
296
297/// Normalize `part_offsets` for ring-based indexing (Polygon mixed with `Point`/`LineString`).
298///
299/// Called only when `geom_offsets` is absent (no Multi\* types) and `ring_offsets` is
300/// present.  In this context `part_offsets` is a compact polygon-only array; this function
301/// expands it to a dense per-geometry array so that `encode_ring_lengths_for_mixed` can index
302/// directly by geometry position.
303///
304/// Each slot in the output holds the first index into `ring_offsets` for that geometry:
305/// - `Point`: no contribution — slot range is empty (`ring_idx` unchanged).
306/// - `LineString`: contributes 1 slot (vertex count) — slot range is 1.
307/// - `Polygon`: contributes `ring_count` slots — slot range equals its ring count.
308fn normalize_part_offsets_for_rings(
309    vector_types: &[GeometryType],
310    part_offsets: &[u32],
311    ring_offsets: &[u32],
312) -> Vec<u32> {
313    let mut normalized = Vec::with_capacity(vector_types.len() + 1);
314    let mut ring_idx = 0_u32;
315    let mut part_idx = 0_usize;
316
317    for &geom_type in vector_types {
318        normalized.push(ring_idx);
319
320        if geom_type == Point {
321            // Point has no vertex-count slot in ring_offsets.
322        } else if geom_type.is_linestring() {
323            // Each LineString occupies exactly one slot in ring_offsets.
324            ring_idx += 1;
325        } else if geom_type.is_polygon() && part_idx + 1 < part_offsets.len() {
326            // Polygon occupies ring_count slots (one vertex-count per ring).
327            let ring_count = part_offsets[part_idx + 1] - part_offsets[part_idx];
328            ring_idx += ring_count;
329            part_idx += 1;
330        }
331        // No Multi* types can appear here (they always produce geom_offsets).
332    }
333
334    // ring_idx must equal ring_offsets.len() - 1 for well-formed data.
335    debug_assert_eq!(
336        ring_idx as usize,
337        ring_offsets.len().saturating_sub(1),
338        "ring index mismatch after normalization"
339    );
340    normalized.push(ring_idx);
341    normalized
342}
343
344/// Whether to race dictionary-based vertex layouts (Hilbert, Morton) against
345/// the plain Vec2 layout for this geometry column.
346///
347/// Profiling showed unconditional racing is ~2× slower overall: most layers
348/// have high vertex uniqueness, where the dict layouts cannot win and the
349/// extra sort + `HashMap` build is wasted. Gate on Morton fitting in 16 bits
350/// per axis (required by the spec) and on a `HyperLogLog`-estimated
351/// uniqueness ratio below the threshold.
352#[hotpath::measure]
353fn dict_may_be_beneficial(vertices: &[i32], enc: &Encoder) -> bool {
354    const MAXIMUM_UNIQUENESS_THRESHOLD_FOR_DICT: f64 = 0.66;
355
356    let coord_count = vertices.len() / 2;
357    if coord_count == 0 || enc.morton_cache.is_none() {
358        return false;
359    }
360
361    let mut hll = HyperLogLog::<Coord<i32>>::with_hasher(0.03, SipHasherBuilder::from_seed(0, 0));
362    for c in vertices.chunks_exact(2) {
363        hll.insert(&Coord::<i32> { x: c[0], y: c[1] });
364    }
365    #[expect(clippy::cast_precision_loss)]
366    let estimated_unique = hll.len().clamp(0.0, coord_count as f64);
367    #[expect(clippy::cast_precision_loss)]
368    let uniqueness_ratio = estimated_unique / coord_count as f64;
369    uniqueness_ratio < MAXIMUM_UNIQUENESS_THRESHOLD_FOR_DICT
370}
371
372/// Pre-populated by [`StagedLayer::encode_into`](crate::encoder::StagedLayer::encode_into);
373/// callers must have gated on [`dict_may_be_beneficial`] which rejects layers
374/// whose extent does not fit Morton.
375fn get_morton(enc: &Encoder) -> Morton {
376    enc.morton_cache.expect(
377        "morton_cache populated by StagedLayer::encode_into; gated by dict_may_be_beneficial",
378    )
379}
380
381/// Pre-populated by [`StagedLayer::encode_into`](crate::encoder::StagedLayer::encode_into).
382fn get_hilbert_params(enc: &Encoder) -> CurveParams {
383    enc.hilbert_cache
384        .expect("hilbert_cache populated by StagedLayer::encode_into")
385}
386
387/// Encode the plain Vec2 vertex layout: componentwise-delta over the raw
388/// `[x0, y0, x1, y1, …]` slice.
389fn encode_vec2_vertex_stream(
390    vertices: &[i32],
391    enc: &mut Encoder,
392    codecs: &mut Codecs,
393) -> MltResult<u8> {
394    let delta = encode_componentwise_delta_vec2s(vertices, &mut codecs.logical.u32_tmp);
395    let ctx = StreamCtx::geom(StreamType::Data(DictionaryType::Vertex), "vertex");
396    let logical = LogicalEncoding::ComponentwiseDelta;
397    write_geo_precomputed_stream(delta, ctx, logical, enc, &mut codecs.physical)
398}
399
400/// Encode a Morton-keyed vertex dictionary: per-vertex offsets stream
401/// followed by a delta-encoded Morton-code dictionary.
402fn encode_morton_vertex_streams(
403    vertices: &[i32],
404    enc: &mut Encoder,
405    codecs: &mut Codecs,
406) -> MltResult<u8> {
407    let morton = get_morton(enc);
408    let (dict, offsets) = build_morton_dict(vertices, morton)?;
409    let mut n: u8 = 0;
410
411    let ctx = StreamCtx::geom(StreamType::Offset(OffsetType::Vertex), "vertex_offsets");
412    n += write_geo_u32_stream(&offsets, ctx, enc, codecs)?;
413
414    let delta = encode_morton_deltas(&dict, &mut codecs.logical.u32_tmp);
415    let ctx = StreamCtx::geom(StreamType::Data(DictionaryType::Morton), "vertex");
416    let logical = LogicalEncoding::MortonDelta(morton);
417    n += write_geo_precomputed_stream(delta, ctx, logical, enc, &mut codecs.physical)?;
418    Ok(n)
419}
420
421/// Encode a Hilbert-keyed vertex dictionary: per-vertex offsets stream
422/// followed by a componentwise-delta-encoded `[x, y, …]` dictionary in
423/// Hilbert order.
424fn encode_hilbert_vertex_streams(
425    vertices: &[i32],
426    enc: &mut Encoder,
427    codecs: &mut Codecs,
428) -> MltResult<u8> {
429    let params = get_hilbert_params(enc);
430    let mut n: u8 = 0;
431
432    // Take scratch ownership locally: `write_geo_*_stream` needs `&mut Codecs`,
433    // which would otherwise conflict with our `&[..]` views into these slots.
434    let mut offsets = mem::take(&mut codecs.logical.hilbert_offsets);
435    let mut indexed = mem::take(&mut codecs.logical.hilbert_indexed);
436    let mut dict_xy = mem::take(&mut codecs.logical.hilbert_dict_xy);
437    let mut remap = mem::take(&mut codecs.logical.hilbert_remap);
438
439    build_hilbert_dict(
440        vertices,
441        params,
442        &mut offsets,
443        &mut indexed,
444        &mut dict_xy,
445        &mut remap,
446    );
447    // Done with these — restore so the physical-encoding race below can use
448    // them via the codec.
449    codecs.logical.hilbert_indexed = indexed;
450    codecs.logical.hilbert_remap = remap;
451
452    let ctx = StreamCtx::geom(StreamType::Offset(OffsetType::Vertex), "vertex_offsets");
453    n += write_geo_u32_stream(&offsets, ctx, enc, codecs)?;
454
455    // Reuse `offsets` as the delta output rather than allocating another Vec;
456    // also keeps `codecs.logical.u32_values` free for the inner race.
457    encode_componentwise_delta_vec2s(&dict_xy, &mut offsets);
458    let ctx = StreamCtx::geom(StreamType::Data(DictionaryType::Vertex), "vertex");
459    let logical = LogicalEncoding::ComponentwiseDelta;
460    n += write_geo_precomputed_stream(&offsets, ctx, logical, enc, &mut codecs.physical)?;
461
462    codecs.logical.hilbert_offsets = offsets;
463    codecs.logical.hilbert_dict_xy = dict_xy;
464    Ok(n)
465}
466
467/// Write a geometry `u32` stream: [`Encoder::override_int_enc`] when explicit mode is active,
468/// otherwise try all pruned candidates and keep the shortest.
469///
470/// Returns `1` if the stream was written, `0` if it was skipped.  Empty streams are skipped
471/// unless [`Encoder::force_stream`] returns `true` for this stream's [`StreamCtx`].
472fn write_geo_u32_stream(
473    data: &[u32],
474    ctx: StreamCtx,
475    enc: &mut Encoder,
476    codecs: &mut Codecs,
477) -> MltResult<u8> {
478    Ok(if data.is_empty() && !enc.force_stream(&ctx) {
479        0
480    } else {
481        codecs.write_int_stream(data, &ctx, enc)?;
482        1
483    })
484}
485
486/// Like [`write_geo_u32_stream`] but for pre-logically-encoded data: competes
487/// only the physical encoders instead of applying a logical transform.
488///
489/// Returns `1` if the stream was written, `0` if skipped (empty + no force).
490fn write_geo_precomputed_stream(
491    data: &[u32],
492    ctx: StreamCtx,
493    logical: LogicalEncoding,
494    enc: &mut Encoder,
495    physical: &mut PhysicalCodecs,
496) -> MltResult<u8> {
497    use PhysicalEncoding as PE;
498
499    Ok(if data.is_empty() && !enc.force_stream(&ctx) {
500        0
501    } else {
502        if let Some(int_enc) = enc.override_int_enc(&ctx) {
503            physical.write_encoded_as::<[u32]>(&ctx, enc, logical, data, int_enc.physical)?;
504        } else if data.is_empty() {
505            let meta = StreamMeta::new2(ctx.stream_type, logical, PE::None, 0)?;
506            write_stream_payload(&mut enc.data, meta, false, &[])?;
507        } else {
508            let mut alt = enc.try_alternatives();
509            alt.with(|enc| {
510                let vals = physical.fastpfor(data)?;
511                let meta = StreamMeta::new2(ctx.stream_type, logical, PE::FastPFor256, data.len())?;
512                write_stream_payload(&mut enc.data, meta, false, vals)
513            })?;
514            alt.with(|enc| {
515                let vals = physical.varint(data);
516                let meta = StreamMeta::new2(ctx.stream_type, logical, PE::VarInt, data.len())?;
517                write_stream_payload(&mut enc.data, meta, false, vals)
518            })?;
519        }
520        1
521    })
522}
523
524impl GeometryValues {
525    /// Write the geometry column to `enc`.
526    #[hotpath::measure]
527    pub fn write_to(self, enc: &mut Encoder, codecs: &mut Codecs) -> MltResult<()> {
528        let Self {
529            vector_types,
530            geometry_offsets,
531            part_offsets,
532            ring_offsets,
533            index_buffer,
534            triangles,
535            vertices,
536        } = self;
537
538        // Flatten every Option<Vec> → Vec  (empty == not present).
539        // triangles: None means no tessellation; Some([]) can't occur in practice (each
540        // push_geom appends a count), so empty == absent is safe here too.
541        // vertices: None means no coordinate data (e.g. empty layer).
542        let geom_offsets = geometry_offsets.unwrap_or_default();
543        let part_offsets = part_offsets.unwrap_or_default();
544        let ring_offsets = ring_offsets.unwrap_or_default();
545        let index_buffer = index_buffer.unwrap_or_default();
546        let triangles = triangles.unwrap_or_default();
547        let vertices = vertices.unwrap_or_default();
548
549        // Direct callers (tests, custom drivers) skip `StagedLayer::encode_into`
550        // and arrive with empty caches; populate from `vertices` so the
551        // dictionary builders can rely on them unconditionally.
552        if enc.hilbert_cache.is_none() {
553            enc.hilbert_cache = Some(CurveParams::from_vertices(&vertices));
554        }
555        if enc.morton_cache.is_none() {
556            let p = enc.hilbert_cache.expect("populated above");
557            enc.morton_cache = Morton::new(p.bits, p.shift).ok();
558        }
559
560        let meta: Vec<u32> = vector_types.iter().map(|t| *t as u32).collect();
561
562        let part_offsets = if geom_offsets.is_empty()
563            && !ring_offsets.is_empty()
564            && !part_offsets.is_empty()
565            && part_offsets.len() != vector_types.len() + 1
566        {
567            // Normalize part_offsets when there are no geometry offsets but ring offsets exist.
568            normalize_part_offsets_for_rings(&vector_types, &part_offsets, &ring_offsets)
569        } else {
570            part_offsets
571        };
572
573        // Write column type to meta; reserve exactly 1 byte for stream count
574        // (geometry never exceeds ~8 streams, always fits in a single varint byte).
575        enc.write_column_type(ColumnType::Geometry)?;
576        let stream_count_pos = enc.data.len();
577        enc.data.push(0); // placeholder — patched below
578        let mut n: u8 = 0;
579
580        // Meta stream — always written, even for a zero-feature layer.
581        let ctx = StreamCtx::geom(StreamType::Length(LengthType::VarBinary), "meta");
582        codecs.write_int_stream(&meta, &ctx, enc)?;
583        n += 1;
584
585        // Topology: compute each length stream and write it immediately.
586        if !geom_offsets.is_empty() {
587            let geom_offsets = if geom_offsets.len() == vector_types.len() + 1 {
588                geom_offsets
589            } else {
590                normalize_geometry_offsets(&vector_types, &geom_offsets)
591            };
592            let data = encode_root_length_stream(&vector_types, &geom_offsets, Polygon);
593            let ctx = StreamCtx::geom(StreamType::Length(LengthType::Geometries), "geometries");
594            n += write_geo_u32_stream(&data, ctx, enc, codecs)?;
595
596            // part_offsets is intentionally kept sparse here (polygon-only cumulative
597            // ring counts). encode_level1/2_length_stream navigate it with a running
598            // part_idx counter that advances only for Polygon/LineString types, which
599            // matches the sparse layout. Densifying via normalize_part_offsets_for_rings
600            // would insert Point slots and corrupt the counter arithmetic.
601            if !part_offsets.is_empty() {
602                if ring_offsets.is_empty() {
603                    // geom → parts only (no rings).
604                    let data = encode_level1_without_ring_buffer_length_stream(
605                        &vector_types,
606                        &geom_offsets,
607                        &part_offsets,
608                    );
609                    let ctx = StreamCtx::geom(StreamType::Length(LengthType::Parts), "no_rings");
610                    n += write_geo_u32_stream(&data, ctx, enc, codecs)?;
611                } else {
612                    // Full topology: geom → parts → rings.
613                    // LineStrings contribute to rings here, not to parts.
614                    let data = encode_level1_length_stream(
615                        &vector_types,
616                        &geom_offsets,
617                        &part_offsets,
618                        false,
619                    );
620                    let ctx = StreamCtx::geom(StreamType::Length(LengthType::Parts), "rings");
621                    n += write_geo_u32_stream(&data, ctx, enc, codecs)?;
622
623                    let data = encode_level2_length_stream(
624                        &vector_types,
625                        &geom_offsets,
626                        &part_offsets,
627                        &ring_offsets,
628                    );
629                    let ctx = StreamCtx::geom(StreamType::Length(LengthType::Rings), "rings2");
630                    n += write_geo_u32_stream(&data, ctx, enc, codecs)?;
631                }
632            }
633        } else if !part_offsets.is_empty() {
634            if ring_offsets.is_empty() {
635                let data = encode_root_length_stream(&vector_types, &part_offsets, Point);
636                let ctx = StreamCtx::geom(StreamType::Length(LengthType::Parts), "no_rings");
637                n += write_geo_u32_stream(&data, ctx, enc, codecs)?;
638            } else {
639                // No Multi* types; parts → rings (Polygon / mixed Point+Polygon).
640                // Java writes an empty GEOMETRIES stream here for tessellated polygons; only do
641                // so when explicitly forced (e.g. to preserve byte-for-byte Java compatibility).
642                let ctx = StreamCtx::geom(StreamType::Length(LengthType::Geometries), "geometries");
643                n += write_geo_u32_stream(&[], ctx, enc, codecs)?;
644
645                let data = encode_root_length_stream(&vector_types, &part_offsets, LineString);
646                let ctx = StreamCtx::geom(StreamType::Length(LengthType::Parts), "parts");
647                n += write_geo_u32_stream(&data, ctx, enc, codecs)?;
648
649                // part_offs is a dense N+1 array (one slot per geometry incl. Points);
650                // ring_offs stores vertex offsets per slot.  The dense-aware helper skips
651                // Point slots by index rather than a running counter.
652                let has_line_string = vector_types
653                    .iter()
654                    .copied()
655                    .any(GeometryType::is_linestring);
656                let data = encode_ring_lengths_for_mixed(
657                    &vector_types,
658                    &part_offsets,
659                    &ring_offsets,
660                    has_line_string,
661                );
662                let ctx = StreamCtx::geom(StreamType::Length(LengthType::Rings), "parts_ring");
663                n += write_geo_u32_stream(&data, ctx, enc, codecs)?;
664            }
665        }
666
667        let ctx = StreamCtx::geom(StreamType::Length(LengthType::Triangles), "triangles");
668        n += write_geo_u32_stream(&triangles, ctx, enc, codecs)?;
669        let ctx = StreamCtx::geom(StreamType::Offset(OffsetType::Index), "triangles_indexes");
670        n += write_geo_u32_stream(&index_buffer, ctx, enc, codecs)?;
671
672        if let Some(forced) = enc.override_vertex_buffer_type() {
673            n += match forced {
674                VertexBufferType::Vec2 => encode_vec2_vertex_stream(&vertices, enc, codecs)?,
675                VertexBufferType::Morton => encode_morton_vertex_streams(&vertices, enc, codecs)?,
676                VertexBufferType::Hilbert => encode_hilbert_vertex_streams(&vertices, enc, codecs)?,
677            };
678        } else if dict_may_be_beneficial(&vertices, enc) {
679            // Morton fits (the gate above ensures it), so race all three.
680            let mut winner_size: usize = usize::MAX;
681            let mut winner_stream_cnt: u8 = 0;
682            let mut alt = enc.try_alternatives();
683            alt.with(|e| {
684                let ds = e.data.len();
685                let ms = e.meta.len();
686                winner_stream_cnt = encode_vec2_vertex_stream(&vertices, e, codecs)?;
687                winner_size = (e.data.len() - ds) + (e.meta.len() - ms);
688                Ok(())
689            })?;
690            alt.with(|e| {
691                let ds = e.data.len();
692                let ms = e.meta.len();
693                let cnt = encode_hilbert_vertex_streams(&vertices, e, codecs)?;
694                let size = (e.data.len() - ds) + (e.meta.len() - ms);
695                if size < winner_size {
696                    winner_stream_cnt = cnt;
697                    winner_size = size;
698                }
699                Ok(())
700            })?;
701            alt.with(|e| {
702                let ds = e.data.len();
703                let ms = e.meta.len();
704                let cnt = encode_morton_vertex_streams(&vertices, e, codecs)?;
705                let size = (e.data.len() - ds) + (e.meta.len() - ms);
706                if size < winner_size {
707                    winner_stream_cnt = cnt;
708                }
709                Ok(())
710            })?;
711            drop(alt);
712            n += winner_stream_cnt;
713        } else {
714            n += encode_vec2_vertex_stream(&vertices, enc, codecs)?;
715        }
716
717        // Patch the reserved stream-count byte.
718        debug_assert!(n <= 127, "geometry stream count must fit in one byte");
719        enc.data[stream_count_pos] = n;
720        Ok(())
721    }
722}
723
724fn encode_morton_deltas<'a>(codes: &[u32], buffer: &'a mut Vec<u32>) -> &'a mut Vec<u32> {
725    buffer.clear();
726    if let Some(&first) = codes.first() {
727        buffer.reserve(codes.len());
728        buffer.extend(std::iter::once(first).chain(codes.windows(2).map(|w| w[1] - w[0])));
729    }
730    buffer
731}
732
733#[cfg(test)]
734mod tests {
735    use super::*;
736
737    #[test]
738    fn test_build_morton_dict() {
739        let meta = Morton { bits: 4, shift: 0 };
740        // vertices: [x0,y0, x1,y1, x2,y2, x3,y3] — repeat (1,2) to test dedup
741        let vertices = [1, 2, 3, 4, 1, 2, 0, 0];
742        let (dict, offsets) = build_morton_dict(&vertices, meta).unwrap();
743
744        assert!(
745            dict.windows(2).all(|w| w[0] < w[1]),
746            "dict not sorted/unique"
747        );
748        assert_eq!(offsets.len(), 4, "offsets length == number of vertex pairs");
749        assert_eq!(offsets[0], offsets[2], "duplicate (1,2) should share index");
750        assert!(offsets.iter().all(|&o| (o as usize) < dict.len()));
751    }
752
753    #[test]
754    fn test_encode_root_length_stream() {
755        // Single Polygon geometry (no Multi)
756        let types = vec![Polygon];
757        let offsets = vec![0, 1]; // One polygon
758
759        let lengths = encode_root_length_stream(&types, &offsets, Polygon);
760        // Polygon == buffer_id, so no length encoded
761        assert!(lengths.is_empty());
762
763        // MultiPolygon needs length encoded
764        let types = vec![GeometryType::MultiPolygon];
765        let offsets = vec![0, 2]; // MultiPolygon with 2 polygons
766
767        let lengths = encode_root_length_stream(&types, &offsets, Polygon);
768        assert_eq!(lengths, vec![2]);
769    }
770}