1use std::collections::HashMap;
2use std::mem;
3
4use geo_types::Coord;
5use probabilistic_collections::SipHasherBuilder;
6use probabilistic_collections::hyperloglog::HyperLogLog;
7
8use super::model::VertexBufferType;
9use crate::MltResult;
10use crate::codecs::hilbert::hilbert_sort_key;
11use crate::codecs::zigzag::encode_componentwise_delta_vec2s;
12use crate::decoder::GeometryType::{LineString, Point, Polygon};
13use crate::decoder::{
14 ColumnType, DictionaryType, GeometryType, GeometryValues, LengthType, LogicalEncoding, Morton,
15 OffsetType, PhysicalEncoding, StreamMeta, StreamType,
16};
17use crate::encoder::model::{CurveParams, StreamCtx};
18use crate::encoder::{Codecs, Encoder, PhysicalCodecs, write_stream_payload};
19use crate::utils::AsUsize as _;
20
21#[hotpath::measure]
29fn build_morton_dict(vertices: &[i32], meta: Morton) -> MltResult<(Vec<u32>, Vec<u32>)> {
30 let codes: Vec<u32> = vertices
31 .chunks_exact(2)
32 .map(|c| meta.encode_morton(c[0], c[1]))
33 .collect::<Result<_, _>>()?;
34
35 let mut dict = codes.clone();
36 dict.sort_unstable();
37 dict.dedup();
38
39 #[expect(
40 clippy::cast_possible_truncation,
41 reason = "dict.len() <= u32::MAX (deduped u32 codes)"
42 )]
43 let code_to_idx: HashMap<u32, u32> = dict
44 .iter()
45 .enumerate()
46 .map(|(i, &c)| (c, i as u32))
47 .collect();
48 let offsets: Vec<u32> = codes.iter().map(|code| code_to_idx[code]).collect();
49
50 Ok((dict, offsets))
51}
52
53#[hotpath::measure]
64fn build_hilbert_dict(
65 vertices: &[i32],
66 params: CurveParams,
67 offsets: &mut Vec<u32>,
68 indexed: &mut Vec<u64>,
69 dict_xy: &mut Vec<i32>,
70 remap: &mut HashMap<u32, u32>,
71) {
72 offsets.clear();
73 indexed.clear();
74 dict_xy.clear();
75 remap.clear();
76
77 let coord_count = vertices.len() / 2;
78 if coord_count == 0 {
79 return;
80 }
81 offsets.reserve(coord_count);
82 indexed.reserve(coord_count);
83 dict_xy.reserve(coord_count * 2);
84 remap.reserve(coord_count);
85
86 for (i, c) in vertices.chunks_exact(2).enumerate() {
87 let k = hilbert_sort_key(Coord { x: c[0], y: c[1] }, params);
88 offsets.push(k);
89 let packed = (u64::from(k) << 32) | (i as u64);
92 indexed.push(packed);
93 }
94 indexed.sort_unstable();
95
96 let mut last_key: Option<u32> = None;
97 for &packed in &*indexed {
98 let key = (packed >> 32) as u32;
99 let src_idx = (packed & 0xFFFF_FFFF) as usize;
100 if last_key != Some(key) {
101 #[expect(
102 clippy::cast_possible_truncation,
103 reason = "dict.len() <= coord_count <= u32::MAX"
104 )]
105 let slot = (dict_xy.len() / 2) as u32;
106 dict_xy.push(vertices[src_idx * 2]);
107 dict_xy.push(vertices[src_idx * 2 + 1]);
108 remap.insert(key, slot);
109 last_key = Some(key);
110 }
111 }
112
113 for k in offsets.iter_mut() {
114 *k = remap[k];
115 }
116}
117
118#[inline]
123fn extend_offsets(lengths: &mut Vec<u32>, offsets: &[u32]) -> usize {
124 lengths.extend(offsets.windows(2).map(|w| w[1] - w[0]));
125 offsets.len() - 1
126}
127
128fn encode_root_length_stream(
138 geom_types: &[GeometryType],
139 geom_offsets: &[u32],
140 buffer_id: GeometryType,
141) -> Vec<u32> {
142 if geom_offsets.len() == geom_types.len() + 1 {
143 geom_types
145 .iter()
146 .zip(geom_offsets.windows(2))
147 .filter(|&(&t, _)| t > buffer_id)
148 .map(|(_, w)| w[1] - w[0])
149 .collect()
150 } else {
151 geom_types
153 .iter()
154 .filter(|&&t| t > buffer_id)
155 .zip(geom_offsets.windows(2))
156 .map(|(_, w)| w[1] - w[0])
157 .collect()
158 }
159}
160
161fn encode_level1_length_stream(
163 geom_types: &[GeometryType],
164 geom_offsets: &[u32],
165 part_offsets: &[u32],
166 is_line_string_present: bool,
167) -> Vec<u32> {
168 let mut lengths = Vec::new();
169 let mut part_idx = 0;
170
171 for (i, &geom_type) in geom_types.iter().enumerate() {
172 if geom_type.is_polygon() || (is_line_string_present && geom_type.is_linestring()) {
173 let n = (geom_offsets[i + 1] - geom_offsets[i]).as_usize();
174 part_idx += extend_offsets(&mut lengths, &part_offsets[part_idx..=part_idx + n]);
175 }
176 }
179
180 lengths
181}
182
183fn encode_ring_lengths_for_mixed(
191 geom_types: &[GeometryType],
192 part_offsets: &[u32],
193 ring_offsets: &[u32],
194 has_line_string: bool,
195) -> Vec<u32> {
196 let mut lengths = Vec::new();
197 for (i, &geom_type) in geom_types.iter().enumerate() {
198 if geom_type.is_polygon() || (has_line_string && geom_type.is_linestring()) {
199 let s = part_offsets[i].as_usize();
200 let e = part_offsets[i + 1].as_usize();
201 extend_offsets(&mut lengths, &ring_offsets[s..=e]);
202 }
203 }
204 lengths
205}
206
207fn encode_level2_length_stream(
213 geom_types: &[GeometryType],
214 geom_offsets: &[u32],
215 part_offsets: &[u32],
216 ring_offsets: &[u32],
217) -> Vec<u32> {
218 let mut lengths = Vec::new();
219 let mut part_idx = 0;
220 let mut ring_idx = 0;
221
222 for (i, &geom_type) in geom_types.iter().enumerate() {
223 let count = (geom_offsets[i + 1] - geom_offsets[i]).as_usize();
224
225 if geom_type.is_polygon() {
229 for _ in 0..count {
231 let n = (part_offsets[part_idx + 1] - part_offsets[part_idx]).as_usize();
232 ring_idx += extend_offsets(&mut lengths, &ring_offsets[ring_idx..=ring_idx + n]);
233 part_idx += 1;
234 }
235 } else if geom_type.is_linestring() {
236 ring_idx += extend_offsets(&mut lengths, &ring_offsets[ring_idx..=ring_idx + count]);
238 }
239 }
241
242 lengths
243}
244
245fn encode_level1_without_ring_buffer_length_stream(
252 geom_types: &[GeometryType],
253 geom_offsets: &[u32],
254 part_offsets: &[u32],
255) -> Vec<u32> {
256 let mut lengths = Vec::new();
257 let mut part_idx = 0;
258
259 for (i, &geom_type) in geom_types.iter().enumerate() {
260 if geom_type.is_linestring() {
261 let n = (geom_offsets[i + 1] - geom_offsets[i]).as_usize();
262 part_idx += extend_offsets(&mut lengths, &part_offsets[part_idx..=part_idx + n]);
263 }
264 }
266
267 lengths
268}
269
270fn normalize_geometry_offsets(vector_types: &[GeometryType], geom_offsets: &[u32]) -> Vec<u32> {
272 let mut normalized = Vec::with_capacity(vector_types.len() + 1);
273 let mut offset = 0_u32;
274 let mut sparse_idx = 0_usize; for &geom_type in vector_types {
277 normalized.push(offset);
278
279 if geom_type.is_multi() {
280 if sparse_idx + 1 < geom_offsets.len() {
282 let start = geom_offsets[sparse_idx];
283 let end = geom_offsets[sparse_idx + 1];
284 offset += end - start;
285 sparse_idx += 1;
286 }
287 } else {
288 offset += 1;
290 }
291 }
292
293 normalized.push(offset);
294 normalized
295}
296
297fn normalize_part_offsets_for_rings(
309 vector_types: &[GeometryType],
310 part_offsets: &[u32],
311 ring_offsets: &[u32],
312) -> Vec<u32> {
313 let mut normalized = Vec::with_capacity(vector_types.len() + 1);
314 let mut ring_idx = 0_u32;
315 let mut part_idx = 0_usize;
316
317 for &geom_type in vector_types {
318 normalized.push(ring_idx);
319
320 if geom_type == Point {
321 } else if geom_type.is_linestring() {
323 ring_idx += 1;
325 } else if geom_type.is_polygon() && part_idx + 1 < part_offsets.len() {
326 let ring_count = part_offsets[part_idx + 1] - part_offsets[part_idx];
328 ring_idx += ring_count;
329 part_idx += 1;
330 }
331 }
333
334 debug_assert_eq!(
336 ring_idx as usize,
337 ring_offsets.len().saturating_sub(1),
338 "ring index mismatch after normalization"
339 );
340 normalized.push(ring_idx);
341 normalized
342}
343
344#[hotpath::measure]
353fn dict_may_be_beneficial(vertices: &[i32], enc: &Encoder) -> bool {
354 const MAXIMUM_UNIQUENESS_THRESHOLD_FOR_DICT: f64 = 0.66;
355
356 let coord_count = vertices.len() / 2;
357 if coord_count == 0 || enc.morton_cache.is_none() {
358 return false;
359 }
360
361 let mut hll = HyperLogLog::<Coord<i32>>::with_hasher(0.03, SipHasherBuilder::from_seed(0, 0));
362 for c in vertices.chunks_exact(2) {
363 hll.insert(&Coord::<i32> { x: c[0], y: c[1] });
364 }
365 #[expect(clippy::cast_precision_loss)]
366 let estimated_unique = hll.len().clamp(0.0, coord_count as f64);
367 #[expect(clippy::cast_precision_loss)]
368 let uniqueness_ratio = estimated_unique / coord_count as f64;
369 uniqueness_ratio < MAXIMUM_UNIQUENESS_THRESHOLD_FOR_DICT
370}
371
372fn get_morton(enc: &Encoder) -> Morton {
376 enc.morton_cache.expect(
377 "morton_cache populated by StagedLayer::encode_into; gated by dict_may_be_beneficial",
378 )
379}
380
381fn get_hilbert_params(enc: &Encoder) -> CurveParams {
383 enc.hilbert_cache
384 .expect("hilbert_cache populated by StagedLayer::encode_into")
385}
386
387fn encode_vec2_vertex_stream(
390 vertices: &[i32],
391 enc: &mut Encoder,
392 codecs: &mut Codecs,
393) -> MltResult<u8> {
394 let delta = encode_componentwise_delta_vec2s(vertices, &mut codecs.logical.u32_tmp);
395 let ctx = StreamCtx::geom(StreamType::Data(DictionaryType::Vertex), "vertex");
396 let logical = LogicalEncoding::ComponentwiseDelta;
397 write_geo_precomputed_stream(delta, ctx, logical, enc, &mut codecs.physical)
398}
399
400fn encode_morton_vertex_streams(
403 vertices: &[i32],
404 enc: &mut Encoder,
405 codecs: &mut Codecs,
406) -> MltResult<u8> {
407 let morton = get_morton(enc);
408 let (dict, offsets) = build_morton_dict(vertices, morton)?;
409 let mut n: u8 = 0;
410
411 let ctx = StreamCtx::geom(StreamType::Offset(OffsetType::Vertex), "vertex_offsets");
412 n += write_geo_u32_stream(&offsets, ctx, enc, codecs)?;
413
414 let delta = encode_morton_deltas(&dict, &mut codecs.logical.u32_tmp);
415 let ctx = StreamCtx::geom(StreamType::Data(DictionaryType::Morton), "vertex");
416 let logical = LogicalEncoding::MortonDelta(morton);
417 n += write_geo_precomputed_stream(delta, ctx, logical, enc, &mut codecs.physical)?;
418 Ok(n)
419}
420
421fn encode_hilbert_vertex_streams(
425 vertices: &[i32],
426 enc: &mut Encoder,
427 codecs: &mut Codecs,
428) -> MltResult<u8> {
429 let params = get_hilbert_params(enc);
430 let mut n: u8 = 0;
431
432 let mut offsets = mem::take(&mut codecs.logical.hilbert_offsets);
435 let mut indexed = mem::take(&mut codecs.logical.hilbert_indexed);
436 let mut dict_xy = mem::take(&mut codecs.logical.hilbert_dict_xy);
437 let mut remap = mem::take(&mut codecs.logical.hilbert_remap);
438
439 build_hilbert_dict(
440 vertices,
441 params,
442 &mut offsets,
443 &mut indexed,
444 &mut dict_xy,
445 &mut remap,
446 );
447 codecs.logical.hilbert_indexed = indexed;
450 codecs.logical.hilbert_remap = remap;
451
452 let ctx = StreamCtx::geom(StreamType::Offset(OffsetType::Vertex), "vertex_offsets");
453 n += write_geo_u32_stream(&offsets, ctx, enc, codecs)?;
454
455 encode_componentwise_delta_vec2s(&dict_xy, &mut offsets);
458 let ctx = StreamCtx::geom(StreamType::Data(DictionaryType::Vertex), "vertex");
459 let logical = LogicalEncoding::ComponentwiseDelta;
460 n += write_geo_precomputed_stream(&offsets, ctx, logical, enc, &mut codecs.physical)?;
461
462 codecs.logical.hilbert_offsets = offsets;
463 codecs.logical.hilbert_dict_xy = dict_xy;
464 Ok(n)
465}
466
467fn write_geo_u32_stream(
473 data: &[u32],
474 ctx: StreamCtx,
475 enc: &mut Encoder,
476 codecs: &mut Codecs,
477) -> MltResult<u8> {
478 Ok(if data.is_empty() && !enc.force_stream(&ctx) {
479 0
480 } else {
481 codecs.write_int_stream(data, &ctx, enc)?;
482 1
483 })
484}
485
486fn write_geo_precomputed_stream(
491 data: &[u32],
492 ctx: StreamCtx,
493 logical: LogicalEncoding,
494 enc: &mut Encoder,
495 physical: &mut PhysicalCodecs,
496) -> MltResult<u8> {
497 use PhysicalEncoding as PE;
498
499 Ok(if data.is_empty() && !enc.force_stream(&ctx) {
500 0
501 } else {
502 if let Some(int_enc) = enc.override_int_enc(&ctx) {
503 physical.write_encoded_as::<[u32]>(&ctx, enc, logical, data, int_enc.physical)?;
504 } else if data.is_empty() {
505 let meta = StreamMeta::new2(ctx.stream_type, logical, PE::None, 0)?;
506 write_stream_payload(&mut enc.data, meta, false, &[])?;
507 } else {
508 let mut alt = enc.try_alternatives();
509 alt.with(|enc| {
510 let vals = physical.fastpfor(data)?;
511 let meta = StreamMeta::new2(ctx.stream_type, logical, PE::FastPFor256, data.len())?;
512 write_stream_payload(&mut enc.data, meta, false, vals)
513 })?;
514 alt.with(|enc| {
515 let vals = physical.varint(data);
516 let meta = StreamMeta::new2(ctx.stream_type, logical, PE::VarInt, data.len())?;
517 write_stream_payload(&mut enc.data, meta, false, vals)
518 })?;
519 }
520 1
521 })
522}
523
524impl GeometryValues {
525 #[hotpath::measure]
527 pub fn write_to(self, enc: &mut Encoder, codecs: &mut Codecs) -> MltResult<()> {
528 let Self {
529 vector_types,
530 geometry_offsets,
531 part_offsets,
532 ring_offsets,
533 index_buffer,
534 triangles,
535 vertices,
536 } = self;
537
538 let geom_offsets = geometry_offsets.unwrap_or_default();
543 let part_offsets = part_offsets.unwrap_or_default();
544 let ring_offsets = ring_offsets.unwrap_or_default();
545 let index_buffer = index_buffer.unwrap_or_default();
546 let triangles = triangles.unwrap_or_default();
547 let vertices = vertices.unwrap_or_default();
548
549 if enc.hilbert_cache.is_none() {
553 enc.hilbert_cache = Some(CurveParams::from_vertices(&vertices));
554 }
555 if enc.morton_cache.is_none() {
556 let p = enc.hilbert_cache.expect("populated above");
557 enc.morton_cache = Morton::new(p.bits, p.shift).ok();
558 }
559
560 let meta: Vec<u32> = vector_types.iter().map(|t| *t as u32).collect();
561
562 let part_offsets = if geom_offsets.is_empty()
563 && !ring_offsets.is_empty()
564 && !part_offsets.is_empty()
565 && part_offsets.len() != vector_types.len() + 1
566 {
567 normalize_part_offsets_for_rings(&vector_types, &part_offsets, &ring_offsets)
569 } else {
570 part_offsets
571 };
572
573 enc.write_column_type(ColumnType::Geometry)?;
576 let stream_count_pos = enc.data.len();
577 enc.data.push(0); let mut n: u8 = 0;
579
580 let ctx = StreamCtx::geom(StreamType::Length(LengthType::VarBinary), "meta");
582 codecs.write_int_stream(&meta, &ctx, enc)?;
583 n += 1;
584
585 if !geom_offsets.is_empty() {
587 let geom_offsets = if geom_offsets.len() == vector_types.len() + 1 {
588 geom_offsets
589 } else {
590 normalize_geometry_offsets(&vector_types, &geom_offsets)
591 };
592 let data = encode_root_length_stream(&vector_types, &geom_offsets, Polygon);
593 let ctx = StreamCtx::geom(StreamType::Length(LengthType::Geometries), "geometries");
594 n += write_geo_u32_stream(&data, ctx, enc, codecs)?;
595
596 if !part_offsets.is_empty() {
602 if ring_offsets.is_empty() {
603 let data = encode_level1_without_ring_buffer_length_stream(
605 &vector_types,
606 &geom_offsets,
607 &part_offsets,
608 );
609 let ctx = StreamCtx::geom(StreamType::Length(LengthType::Parts), "no_rings");
610 n += write_geo_u32_stream(&data, ctx, enc, codecs)?;
611 } else {
612 let data = encode_level1_length_stream(
615 &vector_types,
616 &geom_offsets,
617 &part_offsets,
618 false,
619 );
620 let ctx = StreamCtx::geom(StreamType::Length(LengthType::Parts), "rings");
621 n += write_geo_u32_stream(&data, ctx, enc, codecs)?;
622
623 let data = encode_level2_length_stream(
624 &vector_types,
625 &geom_offsets,
626 &part_offsets,
627 &ring_offsets,
628 );
629 let ctx = StreamCtx::geom(StreamType::Length(LengthType::Rings), "rings2");
630 n += write_geo_u32_stream(&data, ctx, enc, codecs)?;
631 }
632 }
633 } else if !part_offsets.is_empty() {
634 if ring_offsets.is_empty() {
635 let data = encode_root_length_stream(&vector_types, &part_offsets, Point);
636 let ctx = StreamCtx::geom(StreamType::Length(LengthType::Parts), "no_rings");
637 n += write_geo_u32_stream(&data, ctx, enc, codecs)?;
638 } else {
639 let ctx = StreamCtx::geom(StreamType::Length(LengthType::Geometries), "geometries");
643 n += write_geo_u32_stream(&[], ctx, enc, codecs)?;
644
645 let data = encode_root_length_stream(&vector_types, &part_offsets, LineString);
646 let ctx = StreamCtx::geom(StreamType::Length(LengthType::Parts), "parts");
647 n += write_geo_u32_stream(&data, ctx, enc, codecs)?;
648
649 let has_line_string = vector_types
653 .iter()
654 .copied()
655 .any(GeometryType::is_linestring);
656 let data = encode_ring_lengths_for_mixed(
657 &vector_types,
658 &part_offsets,
659 &ring_offsets,
660 has_line_string,
661 );
662 let ctx = StreamCtx::geom(StreamType::Length(LengthType::Rings), "parts_ring");
663 n += write_geo_u32_stream(&data, ctx, enc, codecs)?;
664 }
665 }
666
667 let ctx = StreamCtx::geom(StreamType::Length(LengthType::Triangles), "triangles");
668 n += write_geo_u32_stream(&triangles, ctx, enc, codecs)?;
669 let ctx = StreamCtx::geom(StreamType::Offset(OffsetType::Index), "triangles_indexes");
670 n += write_geo_u32_stream(&index_buffer, ctx, enc, codecs)?;
671
672 if let Some(forced) = enc.override_vertex_buffer_type() {
673 n += match forced {
674 VertexBufferType::Vec2 => encode_vec2_vertex_stream(&vertices, enc, codecs)?,
675 VertexBufferType::Morton => encode_morton_vertex_streams(&vertices, enc, codecs)?,
676 VertexBufferType::Hilbert => encode_hilbert_vertex_streams(&vertices, enc, codecs)?,
677 };
678 } else if dict_may_be_beneficial(&vertices, enc) {
679 let mut winner_size: usize = usize::MAX;
681 let mut winner_stream_cnt: u8 = 0;
682 let mut alt = enc.try_alternatives();
683 alt.with(|e| {
684 let ds = e.data.len();
685 let ms = e.meta.len();
686 winner_stream_cnt = encode_vec2_vertex_stream(&vertices, e, codecs)?;
687 winner_size = (e.data.len() - ds) + (e.meta.len() - ms);
688 Ok(())
689 })?;
690 alt.with(|e| {
691 let ds = e.data.len();
692 let ms = e.meta.len();
693 let cnt = encode_hilbert_vertex_streams(&vertices, e, codecs)?;
694 let size = (e.data.len() - ds) + (e.meta.len() - ms);
695 if size < winner_size {
696 winner_stream_cnt = cnt;
697 winner_size = size;
698 }
699 Ok(())
700 })?;
701 alt.with(|e| {
702 let ds = e.data.len();
703 let ms = e.meta.len();
704 let cnt = encode_morton_vertex_streams(&vertices, e, codecs)?;
705 let size = (e.data.len() - ds) + (e.meta.len() - ms);
706 if size < winner_size {
707 winner_stream_cnt = cnt;
708 }
709 Ok(())
710 })?;
711 drop(alt);
712 n += winner_stream_cnt;
713 } else {
714 n += encode_vec2_vertex_stream(&vertices, enc, codecs)?;
715 }
716
717 debug_assert!(n <= 127, "geometry stream count must fit in one byte");
719 enc.data[stream_count_pos] = n;
720 Ok(())
721 }
722}
723
724fn encode_morton_deltas<'a>(codes: &[u32], buffer: &'a mut Vec<u32>) -> &'a mut Vec<u32> {
725 buffer.clear();
726 if let Some(&first) = codes.first() {
727 buffer.reserve(codes.len());
728 buffer.extend(std::iter::once(first).chain(codes.windows(2).map(|w| w[1] - w[0])));
729 }
730 buffer
731}
732
733#[cfg(test)]
734mod tests {
735 use super::*;
736
737 #[test]
738 fn test_build_morton_dict() {
739 let meta = Morton { bits: 4, shift: 0 };
740 let vertices = [1, 2, 3, 4, 1, 2, 0, 0];
742 let (dict, offsets) = build_morton_dict(&vertices, meta).unwrap();
743
744 assert!(
745 dict.windows(2).all(|w| w[0] < w[1]),
746 "dict not sorted/unique"
747 );
748 assert_eq!(offsets.len(), 4, "offsets length == number of vertex pairs");
749 assert_eq!(offsets[0], offsets[2], "duplicate (1,2) should share index");
750 assert!(offsets.iter().all(|&o| (o as usize) < dict.len()));
751 }
752
753 #[test]
754 fn test_encode_root_length_stream() {
755 let types = vec![Polygon];
757 let offsets = vec![0, 1]; let lengths = encode_root_length_stream(&types, &offsets, Polygon);
760 assert!(lengths.is_empty());
762
763 let types = vec![GeometryType::MultiPolygon];
765 let offsets = vec![0, 2]; let lengths = encode_root_length_stream(&types, &offsets, Polygon);
768 assert_eq!(lengths, vec![2]);
769 }
770}