Skip to main content

pgnumeric/
binary.rs

1// Copyright 2020 CoD Technologies Corp.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Numeric binary representation.
16
17// The Numeric type as stored on disk.
18//
19// If the high bits of the first word of a NumericChoice (n_header, or
20// n_short.n_header, or n_long.n_sign_dscale) are NUMERIC_SHORT, then the
21// numeric follows the NumericShort format; if they are NUMERIC_POS or
22// NUMERIC_NEG, it follows the NumericLong format.  If they are NUMERIC_NAN,
23// it is a NaN.  We currently always store a NaN using just two bytes (i.e.
24// only n_header), but previous releases used only the NumericLong format,
25// so we might find 4-byte NaNs on disk if a database has been migrated using
26// pg_upgrade.  In either case, when the high bits indicate a NaN, the
27// remaining bits are never examined.  Currently, we always initialize these
28// to zero, but it might be possible to use them for some other purpose in
29// the future.
30//
31// In the NumericShort format, the remaining 14 bits of the header word
32// (n_short.n_header) are allocated as follows: 1 for sign (positive or
33// negative), 6 for dynamic scale, and 7 for weight.  In practice, most
34// commonly-encountered values can be represented this way.
35//
36// In the NumericLong format, the remaining 14 bits of the header word
37// (n_long.n_sign_dscale) represent the display scale; and the weight is
38// stored separately in n_weight.
39//
40// NOTE: by convention, values in the packed form have been stripped of
41// all leading and trailing zero digits (where a "digit" is of base NBASE).
42// In particular, if the value is zero, there will be no digits at all!
43// The weight is arbitrary in that case, but we normally set it to zero.
44
45use crate::var::NumericVar;
46use cfg_if::cfg_if;
47use std::marker::PhantomData;
48use std::mem::size_of;
49
50/// Use `i16` to represent a numeric digit.
51pub type NumericDigit = i16;
52
53/// Size of numeric digit.
54pub const NUMERIC_DIGIT_SIZE: u32 = size_of::<NumericDigit>() as u32;
55
56pub const VAR_HEADER_SIZE: i32 = size_of::<i32>() as i32;
57pub const NUMERIC_HEADER_SIZE: usize =
58    VAR_HEADER_SIZE as usize + size_of::<u16>() + size_of::<i16>();
59pub const NUMERIC_HEADER_SIZE_SHORT: usize = VAR_HEADER_SIZE as usize + size_of::<u16>();
60pub const NUMERIC_HEADER_NDIGITS: u32 =
61    (NUMERIC_HEADER_SIZE as u32 + NUMERIC_DIGIT_SIZE - 1) / NUMERIC_DIGIT_SIZE;
62
63// Interpretation of high bits.
64pub const NUMERIC_SIGN_MASK: u16 = 0xC000;
65pub const NUMERIC_POS: u16 = 0x0000;
66pub const NUMERIC_NEG: u16 = 0x4000;
67pub const NUMERIC_SHORT: u16 = 0x8000;
68pub const NUMERIC_NAN: u16 = 0xC000;
69
70// Short format definitions.
71const NUMERIC_SHORT_SIGN_MASK: u16 = 0x2000;
72const NUMERIC_SHORT_DSCALE_MASK: u16 = 0x1F80;
73const NUMERIC_SHORT_DSCALE_SHIFT: u16 = 7;
74const NUMERIC_SHORT_DSCALE_MAX: u16 = NUMERIC_SHORT_DSCALE_MASK >> NUMERIC_SHORT_DSCALE_SHIFT;
75const NUMERIC_SHORT_WEIGHT_SIGN_MASK: u16 = 0x0040;
76const NUMERIC_SHORT_WEIGHT_MASK: u16 = 0x003F;
77const NUMERIC_SHORT_WEIGHT_MAX: i16 = NUMERIC_SHORT_WEIGHT_MASK as i16;
78const NUMERIC_SHORT_WEIGHT_MIN: i16 = (-(NUMERIC_SHORT_WEIGHT_MASK as i32 + 1)) as i16;
79
80const NUMERIC_DSCALE_MASK: u16 = 0x3FFF;
81
82pub const NUMERIC_WEIGHT_MAX: i16 = i16::max_value();
83pub const NUMERIC_WEIGHT_MIN: i16 = -NUMERIC_WEIGHT_MAX;
84pub const NUMERIC_DSCALE_MAX: i16 = 0x3FFF;
85
86/// A flexible array.
87#[repr(C)]
88#[derive(Debug)]
89struct FlexArray<T>(PhantomData<T>, [T; 0]);
90
91impl<T> FlexArray<T> {
92    #[inline]
93    pub fn as_ptr(&self) -> *const T {
94        self as *const _ as *const T
95    }
96
97    #[inline]
98    pub unsafe fn as_slice(&self, len: usize) -> &[T] {
99        std::slice::from_raw_parts(self.as_ptr(), len)
100    }
101}
102
103/// A `union` field.
104#[repr(C)]
105#[derive(Debug)]
106struct UnionField<T>(PhantomData<T>);
107
108impl<T> UnionField<T> {
109    #[inline]
110    pub const fn new() -> Self {
111        Self(PhantomData)
112    }
113
114    #[inline]
115    pub unsafe fn as_ref(&self) -> &T {
116        &*(self as *const _ as *const T)
117    }
118
119    #[inline]
120    pub unsafe fn as_mut(&mut self) -> &mut T {
121        &mut *(self as *mut _ as *mut T)
122    }
123}
124
125#[repr(C)]
126pub(crate) struct NumericShort {
127    // Sign + display scale + weight
128    n_header: u16,
129
130    // Digits
131    n_data: FlexArray<NumericDigit>,
132}
133
134impl NumericShort {
135    #[inline]
136    pub fn set_header(&mut self, weight: i16, dscale: u16, sign: u16) {
137        if sign == NUMERIC_NAN {
138            debug_assert_eq!(weight, 0);
139            debug_assert_eq!(dscale, 0);
140            self.n_header = NUMERIC_NAN;
141            return;
142        }
143
144        let s = if sign == NUMERIC_NEG {
145            NUMERIC_SHORT | NUMERIC_SHORT_SIGN_MASK
146        } else {
147            NUMERIC_SHORT
148        };
149        let ds = dscale << NUMERIC_SHORT_DSCALE_SHIFT;
150        let ws = if weight < 0 {
151            NUMERIC_SHORT_WEIGHT_SIGN_MASK
152        } else {
153            0
154        };
155        let w = (weight & NUMERIC_SHORT_WEIGHT_MASK as i16) as u16;
156        self.n_header = s | ds | ws | w;
157    }
158}
159
160#[repr(C)]
161pub(crate) struct NumericLong {
162    // Sign + display scale
163    n_sign_dscale: u16,
164
165    // Weight of 1st digit
166    n_weight: i16,
167
168    // Digits
169    n_data: FlexArray<NumericDigit>,
170}
171
172impl NumericLong {
173    #[inline]
174    pub fn set_header(&mut self, weight: i16, dscale: u16, sign: u16) {
175        self.n_sign_dscale = sign | (dscale & NUMERIC_DSCALE_MASK);
176        self.n_weight = weight;
177    }
178}
179
180/// `NumericChoice` is a `union` in PostgreSQL.
181/// Here we use `UnionField` to simulate it.
182#[repr(C)]
183struct NumericChoice {
184    // Header word
185    n_header: UnionField<u16>,
186
187    // Long form (4-byte header)
188    n_long: UnionField<NumericLong>,
189
190    // Short form (2-byte header)
191    n_short: UnionField<NumericShort>,
192
193    // 4-byte header for union
194    _data: [u16; 2],
195}
196
197impl NumericChoice {
198    #[inline]
199    fn n_header(&self) -> u16 {
200        unsafe { *(self.n_header.as_ref()) }
201    }
202
203    #[inline]
204    pub fn flag_bits(&self) -> u16 {
205        self.n_header() & NUMERIC_SIGN_MASK
206    }
207
208    #[inline]
209    pub fn is_nan(&self) -> bool {
210        self.flag_bits() == NUMERIC_NAN
211    }
212
213    #[inline]
214    pub fn is_short(&self) -> bool {
215        self.flag_bits() == NUMERIC_SHORT
216    }
217
218    /// If the flag bits are `NUMERIC_SHORT` or `NUMERIC_NAN`, we want the short header;
219    /// otherwise, we want the long one.  Instead of testing against each value, we
220    /// can just look at the high bit, for a slight efficiency gain.
221    #[inline]
222    pub fn header_is_short(&self) -> bool {
223        (self.n_header() & 0x8000) != 0
224    }
225
226    #[inline]
227    pub fn header_size(&self) -> u32 {
228        if self.header_is_short() {
229            NUMERIC_HEADER_SIZE_SHORT as u32
230        } else {
231            NUMERIC_HEADER_SIZE as u32
232        }
233    }
234
235    #[inline]
236    pub fn sign(&self) -> u16 {
237        if self.is_short() {
238            if (self.n_header() & NUMERIC_SHORT_SIGN_MASK) != 0 {
239                NUMERIC_NEG
240            } else {
241                NUMERIC_POS
242            }
243        } else {
244            self.flag_bits()
245        }
246    }
247
248    #[inline]
249    pub fn dscale(&self) -> u16 {
250        unsafe {
251            if self.header_is_short() {
252                (self.n_short.as_ref().n_header & NUMERIC_SHORT_DSCALE_MASK)
253                    >> NUMERIC_SHORT_DSCALE_SHIFT
254            } else {
255                self.n_long.as_ref().n_sign_dscale & NUMERIC_DSCALE_MASK
256            }
257        }
258    }
259
260    #[inline]
261    fn weight_short(&self) -> i16 {
262        debug_assert!(self.header_is_short());
263        let weight_sign =
264            unsafe { self.n_short.as_ref().n_header & NUMERIC_SHORT_WEIGHT_SIGN_MASK };
265        let weight = unsafe { self.n_short.as_ref().n_header & NUMERIC_SHORT_WEIGHT_MASK };
266        if weight_sign != 0 {
267            ((!NUMERIC_SHORT_WEIGHT_MASK) | weight) as i16
268        } else {
269            weight as i16
270        }
271    }
272
273    #[inline]
274    fn weight_long(&self) -> i16 {
275        debug_assert!(!self.header_is_short());
276        unsafe { self.n_long.as_ref().n_weight }
277    }
278
279    #[inline]
280    pub fn weight(&self) -> i16 {
281        if self.header_is_short() {
282            self.weight_short()
283        } else {
284            self.weight_long()
285        }
286    }
287}
288
289/// `NumericBinary` is used to represent binary format of disk storage.
290/// Notes that do not create a `NumericBinary` directly.
291#[repr(C)]
292pub(crate) struct NumericBinary {
293    // varlena header (do not touch directly!)
294    // xxxxxx00 4-byte length word, aligned, uncompressed data (up to 1G)
295    // see also postgres.h
296    vl_len: u32,
297
298    // choice of format
299    choice: NumericChoice,
300}
301
302impl NumericBinary {
303    #[inline]
304    pub fn can_be_short(weight: i16, dscale: u16) -> bool {
305        dscale <= NUMERIC_SHORT_DSCALE_MAX
306            && weight <= NUMERIC_SHORT_WEIGHT_MAX
307            && weight >= NUMERIC_SHORT_WEIGHT_MIN
308    }
309
310    #[inline]
311    const fn encode_len(len: u32) -> u32 {
312        cfg_if! {
313            if #[cfg(feature = "big-endian-varlen")] {
314                (len & 0x3FFF_FFFF).to_be()
315            } else {
316                len << 2
317            }
318        }
319    }
320
321    #[inline]
322    const fn decode_len(len: u32) -> u32 {
323        cfg_if! {
324            if #[cfg(feature = "big-endian-varlen")] {
325                u32::from_be(len) & 0x3FFF_FFFF
326            } else {
327                (len >> 2) & 0x3FFF_FFFF
328            }
329        }
330    }
331
332    #[inline]
333    pub fn set_len(&mut self, len: u32) {
334        self.vl_len = NumericBinary::encode_len(len);
335    }
336
337    #[inline]
338    pub const fn len(&self) -> u32 {
339        NumericBinary::decode_len(self.vl_len)
340    }
341
342    #[allow(dead_code)]
343    #[inline]
344    pub fn is_short(&self) -> bool {
345        self.choice.is_short()
346    }
347
348    #[inline]
349    pub fn header_size(&self) -> u32 {
350        self.choice.header_size()
351    }
352
353    #[inline]
354    pub fn ndigits(&self) -> i32 {
355        ((self.len() - self.choice.header_size()) / NUMERIC_DIGIT_SIZE) as i32
356    }
357
358    #[inline]
359    pub fn sign(&self) -> u16 {
360        self.choice.sign()
361    }
362
363    #[inline]
364    pub fn dscale(&self) -> u16 {
365        self.choice.dscale()
366    }
367
368    #[inline]
369    pub fn weight(&self) -> i16 {
370        self.choice.weight()
371    }
372
373    #[inline]
374    pub fn is_nan(&self) -> bool {
375        self.choice.is_nan()
376    }
377
378    #[inline]
379    pub fn is_negative(&self) -> bool {
380        self.choice.sign() == NUMERIC_NEG
381    }
382
383    #[inline]
384    pub fn is_positive(&self) -> bool {
385        self.choice.sign() == NUMERIC_POS
386    }
387
388    #[inline]
389    pub fn long_mut(&mut self) -> &mut NumericLong {
390        unsafe { self.choice.n_long.as_mut() }
391    }
392
393    #[inline]
394    pub fn short_mut(&mut self) -> &mut NumericShort {
395        unsafe { self.choice.n_short.as_mut() }
396    }
397
398    #[inline]
399    pub const fn nan() -> NumericBinary {
400        NumericBinary {
401            vl_len: NumericBinary::encode_len(NUMERIC_HEADER_SIZE_SHORT as u32),
402            choice: NumericChoice {
403                n_header: UnionField::new(),
404                n_long: UnionField::new(),
405                n_short: UnionField::new(),
406                _data: [NUMERIC_NAN, 0],
407            },
408        }
409    }
410
411    #[inline]
412    pub const fn zero() -> NumericBinary {
413        NumericBinary {
414            vl_len: NumericBinary::encode_len(NUMERIC_HEADER_SIZE_SHORT as u32),
415            choice: NumericChoice {
416                n_header: UnionField::new(),
417                n_long: UnionField::new(),
418                n_short: UnionField::new(),
419                _data: [NUMERIC_SHORT, 0],
420            },
421        }
422    }
423
424    #[inline]
425    pub fn as_bytes(&self) -> &[u8] {
426        let len = self.len();
427        unsafe {
428            std::slice::from_raw_parts(self as *const NumericBinary as *const u8, len as usize)
429        }
430    }
431
432    #[inline]
433    pub fn as_var(&self) -> NumericVar {
434        let flat_bits = self.choice.flag_bits();
435        if flat_bits == NUMERIC_SHORT {
436            let len = self.len();
437            let ndigits = ((len - NUMERIC_HEADER_SIZE_SHORT as u32) / NUMERIC_DIGIT_SIZE) as i32;
438
439            let short = unsafe { self.choice.n_short.as_ref() };
440            let weight = {
441                let weight_sign = short.n_header & NUMERIC_SHORT_WEIGHT_SIGN_MASK;
442                let weight = short.n_header & NUMERIC_SHORT_WEIGHT_MASK;
443                if weight_sign != 0 {
444                    ((!NUMERIC_SHORT_WEIGHT_MASK) | weight) as i16
445                } else {
446                    weight as i16
447                }
448            };
449            let dscale = (short.n_header & NUMERIC_SHORT_DSCALE_MASK) >> NUMERIC_SHORT_DSCALE_SHIFT;
450            let sign = {
451                if (self.choice.n_header() & NUMERIC_SHORT_SIGN_MASK) != 0 {
452                    NUMERIC_NEG
453                } else {
454                    NUMERIC_POS
455                }
456            };
457            let digits = unsafe { short.n_data.as_slice(ndigits as usize) };
458
459            NumericVar::borrowed(ndigits, weight as i32, dscale as i32, sign, digits)
460        } else if flat_bits == NUMERIC_NAN {
461            NumericVar::borrowed(0, 0, 0, NUMERIC_NAN, &[])
462        } else {
463            let len = self.len();
464            let ndigits = ((len - NUMERIC_HEADER_SIZE as u32) / NUMERIC_DIGIT_SIZE) as i32;
465
466            let long = unsafe { self.choice.n_long.as_ref() };
467            let weight = long.n_weight;
468            let dscale = long.n_sign_dscale & NUMERIC_DSCALE_MASK;
469            let sign = flat_bits;
470            let digits = unsafe { long.n_data.as_slice(ndigits as usize) };
471
472            NumericVar::borrowed(ndigits, weight as i32, dscale as i32, sign, digits)
473        }
474    }
475}
476
477#[cfg(test)]
478mod tests {
479    use super::*;
480    use crate::data::NumericData;
481    use std::mem;
482    use std::mem::size_of;
483
484    #[test]
485    fn consts() {
486        assert_eq!(NUMERIC_WEIGHT_MAX, 32767);
487        assert_eq!(NUMERIC_WEIGHT_MIN, -32767);
488        assert_eq!(NUMERIC_DSCALE_MAX, 16383);
489        assert_eq!(NUMERIC_SHORT_WEIGHT_MAX, 63);
490        assert_eq!(NUMERIC_SHORT_WEIGHT_MIN, -64);
491        assert_eq!(NUMERIC_SHORT_DSCALE_MAX, 63);
492        assert_eq!(NUMERIC_HEADER_SIZE % NUMERIC_DIGIT_SIZE as usize, 0);
493        assert_eq!(NUMERIC_HEADER_SIZE_SHORT % NUMERIC_DIGIT_SIZE as usize, 0);
494    }
495
496    #[test]
497    fn binary() {
498        let mut data = NumericData::with_ndigits(3);
499        assert_eq!(data.len(), 3 + NUMERIC_HEADER_NDIGITS + 1);
500        assert_eq!(data.offset(), NUMERIC_HEADER_NDIGITS + 1);
501
502        let buf = data.as_mut_slice().as_ptr();
503        let offset = unsafe { buf.offset(data.offset() as isize) };
504
505        unsafe {
506            let bin_ptr = (offset as *mut u8).sub(NUMERIC_HEADER_SIZE);
507            let bin: &mut NumericBinary = mem::transmute(bin_ptr);
508            assert_eq!(bin_ptr, buf.offset(1) as *mut u8);
509
510            let long_mut = bin.long_mut();
511            let long = long_mut as *mut NumericLong as *mut u8;
512            assert_eq!(bin_ptr.offset(size_of::<u32>() as isize), long);
513
514            let long_data = long_mut.n_data.as_ptr();
515            assert_eq!(
516                long.offset((size_of::<u16>() + size_of::<i16>()) as isize),
517                long_data as *mut u8
518            );
519        }
520
521        unsafe {
522            let bin_ptr = (offset as *mut u8).sub(NUMERIC_HEADER_SIZE_SHORT);
523            let bin: &mut NumericBinary = mem::transmute(bin_ptr);
524            assert_eq!(bin_ptr, buf.offset(2) as *mut u8);
525
526            let short_mut = bin.short_mut();
527            let short = short_mut as *mut NumericShort as *mut u8;
528            assert_eq!(bin_ptr.offset(size_of::<u32>() as isize), short);
529
530            let short_data = short_mut.n_data.as_ptr();
531            assert_eq!(
532                short.offset(size_of::<u16>() as isize),
533                short_data as *mut u8
534            );
535        }
536    }
537}