Skip to main content

geonative_shapefile/
shx.rs

1//! `.shx` random-access index for the companion `.shp`.
2//!
3//! ## Layout
4//!
5//! Same 100-byte header as `.shp` (file code, version, shape type, bbox),
6//! then fixed 8-byte records:
7//!
8//! | Bytes | Field |
9//! | --- | --- |
10//! | 0..4 | Offset in 16-bit words from `.shp` start (big-endian i32) |
11//! | 4..8 | Content length in 16-bit words (big-endian i32) |
12//!
13//! Both fields are **big-endian** — they're "file-management" data in
14//! Shapefile parlance. Byte offset = `offset_words * 2`. Without this file
15//! we'd have to scan `.shp` sequentially to find each record.
16//!
17//! ## Clever bit
18//!
19//! We pre-convert words → bytes once on load so the iterator doesn't have
20//! to multiply on every step. The index is parsed eagerly into a `Vec`
21//! (small even for huge shapefiles — 8 bytes × N records ≈ 80 MB at
22//! 10M features, well within RAM budget).
23
24use crate::bytes::Cursor;
25use crate::error::Result;
26use crate::header;
27
28#[derive(Debug, Clone)]
29pub struct ShxRecord {
30    /// Byte offset of the record header in `.shp`.
31    pub offset_bytes: u64,
32    /// Content length in bytes (excludes the 8-byte record header).
33    pub content_len_bytes: u32,
34}
35
36#[derive(Debug, Clone)]
37pub struct Shx {
38    pub header: header::ShpHeader,
39    pub records: Vec<ShxRecord>,
40}
41
42pub fn parse(bytes: &[u8]) -> Result<Shx> {
43    let h = header::parse(bytes)?;
44    let body = &bytes[header::SHP_HEADER_BYTES..];
45    let n = body.len() / 8;
46    let mut records = Vec::with_capacity(n);
47    let mut c = Cursor::new(body);
48    for _ in 0..n {
49        let offset_words = c.read_i32_be()?;
50        let len_words = c.read_i32_be()?;
51        records.push(ShxRecord {
52            offset_bytes: (offset_words as i64 * 2) as u64,
53            content_len_bytes: (len_words as i64 * 2) as u32,
54        });
55    }
56    Ok(Shx { header: h, records })
57}
58
59#[cfg(test)]
60mod tests {
61    use super::*;
62    use crate::header::{SHP_FILE_CODE, SHP_VERSION};
63
64    fn synth_shx(records: &[(i32, i32)]) -> Vec<u8> {
65        let total_words = 50 + records.len() as i32 * 4;
66        let mut buf = vec![0u8; 100];
67        buf[0..4].copy_from_slice(&SHP_FILE_CODE.to_be_bytes());
68        buf[24..28].copy_from_slice(&total_words.to_be_bytes());
69        buf[28..32].copy_from_slice(&SHP_VERSION.to_le_bytes());
70        buf[32..36].copy_from_slice(&1i32.to_le_bytes()); // Point
71                                                          // bbox = zeros, that's fine
72        for &(off, len) in records {
73            buf.extend_from_slice(&off.to_be_bytes());
74            buf.extend_from_slice(&len.to_be_bytes());
75        }
76        buf
77    }
78
79    #[test]
80    fn parses_two_records() {
81        let shx = parse(&synth_shx(&[(50, 10), (62, 10)])).unwrap();
82        assert_eq!(shx.records.len(), 2);
83        assert_eq!(shx.records[0].offset_bytes, 100);
84        assert_eq!(shx.records[0].content_len_bytes, 20);
85        assert_eq!(shx.records[1].offset_bytes, 124);
86    }
87}