oxigdal_postgis/copy_binary.rs
1//! PostgreSQL binary COPY payload encoder (PostgreSQL §53.2 — "Binary Format").
2//!
3//! Produces the byte stream consumed by `COPY ... FROM STDIN WITH (FORMAT binary)`.
4//! All integers are big-endian. Fully unit-testable without a database.
5//!
6//! # Binary COPY stream layout
7//!
8//! A binary-COPY stream is structured as:
9//!
10//! ```text
11//! +------------------------------------------------------------------+
12//! | File header |
13//! | signature : 11 bytes = "PGCOPY\n\xff\r\n\0" |
14//! | flags : i32 BE = 0 (no OID column) |
15//! | header ext len : i32 BE = 0 (no header extension) |
16//! +------------------------------------------------------------------+
17//! | Tuple 0 |
18//! | field count : i16 BE = number of fields in the row |
19//! | field 0 : i32 BE length prefix + that many raw bytes, |
20//! | or i32 BE -1 to signal SQL NULL |
21//! | field 1 : ... |
22//! +------------------------------------------------------------------+
23//! | Tuple 1 ... |
24//! +------------------------------------------------------------------+
25//! | File trailer |
26//! | field count : i16 BE = -1 (end-of-data marker) |
27//! +------------------------------------------------------------------+
28//! ```
29//!
30//! For a non-null field the i32 length prefix counts only the field body, not
31//! the prefix itself. A length prefix of `-1` denotes SQL `NULL` and is *not*
32//! followed by any bytes.
33
34use crate::error::{Result, WkbError};
35
36/// 11-byte fixed signature that opens every binary-COPY stream.
37pub const COPY_BINARY_SIGNATURE: [u8; 11] = *b"PGCOPY\n\xff\r\n\0";
38
39/// EWKB SRID flag bit set on the geometry-type word when an SRID is embedded.
40///
41/// PostGIS Extended WKB sets this bit on the 32-bit geometry-type word to mark
42/// that an `i32` SRID immediately follows the type word.
43const EWKB_SRID_FLAG: u32 = 0x2000_0000;
44
45/// Incremental encoder for a binary-COPY payload.
46///
47/// The encoder owns a growing byte buffer. Construction emits the file header;
48/// [`begin_row`](CopyBinaryEncoder::begin_row), the `write_*` methods and
49/// [`finish`](CopyBinaryEncoder::finish) append tuples and the trailer.
50pub struct CopyBinaryEncoder {
51 buf: Vec<u8>,
52}
53
54impl CopyBinaryEncoder {
55 /// New encoder — emits signature + 4-byte flags(0) + 4-byte header extension length(0).
56 pub fn new() -> Self {
57 let mut buf = Vec::new();
58 buf.extend_from_slice(©_BINARY_SIGNATURE);
59 buf.extend_from_slice(&0i32.to_be_bytes()); // flags
60 buf.extend_from_slice(&0i32.to_be_bytes()); // header extension length
61 Self { buf }
62 }
63
64 /// Start a row with `field_count` fields (i16 BE).
65 pub fn begin_row(&mut self, field_count: i16) {
66 self.buf.extend_from_slice(&field_count.to_be_bytes());
67 }
68
69 /// Write a non-null field: i32 BE length prefix + raw bytes.
70 pub fn write_field_bytes(&mut self, bytes: &[u8]) {
71 self.buf
72 .extend_from_slice(&(bytes.len() as i32).to_be_bytes());
73 self.buf.extend_from_slice(bytes);
74 }
75
76 /// Write a NULL field: i32 BE -1.
77 pub fn write_null(&mut self) {
78 self.buf.extend_from_slice(&(-1i32).to_be_bytes());
79 }
80
81 /// Finish: append i16 BE -1 trailer, return the payload.
82 pub fn finish(mut self) -> Vec<u8> {
83 self.buf.extend_from_slice(&(-1i16).to_be_bytes());
84 self.buf
85 }
86
87 /// Current length of the in-progress payload in bytes.
88 pub fn len(&self) -> usize {
89 self.buf.len()
90 }
91
92 /// Returns `true` when the underlying buffer is empty.
93 ///
94 /// After [`new`](CopyBinaryEncoder::new) this is always `false` because the
95 /// constructor has already emitted the 19-byte file header.
96 pub fn is_empty(&self) -> bool {
97 self.buf.is_empty()
98 }
99}
100
101impl Default for CopyBinaryEncoder {
102 fn default() -> Self {
103 Self::new()
104 }
105}
106
107/// Convert plain WKB to EWKB by setting the SRID flag bit on the geometry-type
108/// word and inserting the i32 SRID. Returns EWKB bytes suitable for PostGIS COPY.
109///
110/// `wkb` must be standard WKB:
111///
112/// ```text
113/// byte 0 : byte order (0x00 = big-endian, 0x01 = little-endian)
114/// bytes 1..5 : geometry type, u32 in the declared byte order
115/// bytes 5.. : geometry body
116/// ```
117///
118/// The produced EWKB sets bit `0x20000000` on the geometry-type word and splices
119/// a 4-byte SRID (encoded in the same byte order) immediately after the type
120/// word:
121///
122/// ```text
123/// byte 0 : byte order (unchanged)
124/// bytes 1..5 : geometry type | 0x20000000, u32 in the declared byte order
125/// bytes 5..9 : SRID, i32 in the declared byte order
126/// bytes 9.. : geometry body (unchanged)
127/// ```
128///
129/// If the input type word already has `0x20000000` set the WKB is assumed to be
130/// EWKB already and is returned verbatim.
131///
132/// # Errors
133///
134/// Returns [`WkbError::BufferTooShort`] when `wkb` has fewer than 5 bytes (it
135/// cannot contain a byte-order byte plus a 4-byte type word), and
136/// [`WkbError::InvalidByteOrder`] when byte 0 is neither `0x00` nor `0x01`.
137pub fn ewkb_from_wkb(wkb: &[u8], srid: i32) -> Result<Vec<u8>> {
138 // A valid WKB header is 1 byte (byte order) + 4 bytes (geometry type).
139 if wkb.len() < 5 {
140 return Err(WkbError::BufferTooShort {
141 expected: 5,
142 actual: wkb.len(),
143 }
144 .into());
145 }
146
147 let order_byte = wkb[0];
148 let little_endian = match order_byte {
149 0x00 => false,
150 0x01 => true,
151 other => return Err(WkbError::InvalidByteOrder { byte: other }.into()),
152 };
153
154 // Read the 4-byte geometry-type word in the declared byte order.
155 let type_bytes: [u8; 4] = [wkb[1], wkb[2], wkb[3], wkb[4]];
156 let type_word = if little_endian {
157 u32::from_le_bytes(type_bytes)
158 } else {
159 u32::from_be_bytes(type_bytes)
160 };
161
162 // Already EWKB: the SRID flag is set, so return the input untouched.
163 if type_word & EWKB_SRID_FLAG != 0 {
164 return Ok(wkb.to_vec());
165 }
166
167 // Set the SRID flag and re-encode the type word in the same byte order.
168 let ewkb_type = type_word | EWKB_SRID_FLAG;
169 let ewkb_type_bytes = if little_endian {
170 ewkb_type.to_le_bytes()
171 } else {
172 ewkb_type.to_be_bytes()
173 };
174 let srid_bytes = if little_endian {
175 srid.to_le_bytes()
176 } else {
177 srid.to_be_bytes()
178 };
179
180 // Assemble: order byte + flagged type word + SRID + original body.
181 let mut out = Vec::with_capacity(wkb.len() + 4);
182 out.push(order_byte);
183 out.extend_from_slice(&ewkb_type_bytes);
184 out.extend_from_slice(&srid_bytes);
185 out.extend_from_slice(&wkb[5..]);
186 Ok(out)
187}
188
189#[cfg(test)]
190mod tests {
191 use super::*;
192
193 #[test]
194 fn test_new_emits_header() {
195 let encoder = CopyBinaryEncoder::new();
196 // 11-byte signature + 4-byte flags + 4-byte header extension length.
197 assert_eq!(encoder.len(), 19);
198 assert!(!encoder.is_empty());
199 }
200
201 #[test]
202 fn test_finish_minimal_stream() {
203 let encoder = CopyBinaryEncoder::new();
204 let payload = encoder.finish();
205 // 19-byte header + 2-byte trailer.
206 assert_eq!(payload.len(), 21);
207 assert_eq!(&payload[payload.len() - 2..], &[0xFF, 0xFF]);
208 }
209
210 #[test]
211 fn test_ewkb_rejects_short_buffer() {
212 let result = ewkb_from_wkb(&[0x01, 0x01, 0x00], 4326);
213 assert!(result.is_err());
214 }
215
216 #[test]
217 fn test_ewkb_rejects_bad_byte_order() {
218 // Byte 0 = 0x02 is neither big- nor little-endian.
219 let result = ewkb_from_wkb(&[0x02, 0x01, 0x00, 0x00, 0x00], 4326);
220 assert!(result.is_err());
221 }
222
223 #[test]
224 fn test_ewkb_idempotent_when_already_ewkb() {
225 // Little-endian Point type word already carrying the SRID flag
226 // (0x20000001) followed by an existing SRID.
227 let ewkb = [
228 0x01, // little-endian
229 0x01, 0x00, 0x00, 0x20, // type = Point | 0x20000000
230 0xE6, 0x10, 0x00, 0x00, // SRID 4326
231 ];
232 let out = ewkb_from_wkb(&ewkb, 3857).expect("ewkb conversion failed");
233 assert_eq!(out, ewkb);
234 }
235}