jsonb/
extension.rs

1// Copyright 2023 Datafuse Labs.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::cmp::Ordering;
16use std::fmt::Debug;
17use std::fmt::Display;
18use std::fmt::Formatter;
19
20use jiff::civil::date;
21use jiff::fmt::strtime;
22use jiff::tz::Offset;
23use jiff::SignedDuration;
24
25const MICROS_PER_SEC: i64 = 1_000_000;
26const MICROS_PER_MINUTE: i64 = 60 * MICROS_PER_SEC;
27const MICROS_PER_HOUR: i64 = 60 * MICROS_PER_MINUTE;
28const MONTHS_PER_YEAR: i32 = 12;
29
30const TIMESTAMP_FORMAT: &str = "%Y-%m-%d %H:%M:%S%.6f";
31
32/// Represents extended JSON value types that are not supported in standard JSON.
33///
34/// Standard JSON only supports strings, numbers, booleans, null, arrays, and objects.
35/// This enum provides additional data types commonly needed in database systems and
36/// other applications that require more specialized data representations.
37#[derive(Debug, Clone)]
38pub enum ExtensionValue<'a> {
39    /// Binary data (byte array), allowing efficient storage of binary content
40    /// that would otherwise require base64 encoding in standard JSON
41    Binary(&'a [u8]),
42    /// Calendar date without time component (year, month, day)
43    Date(Date),
44    /// Timestamp with microsecond precision but without timezone information
45    Timestamp(Timestamp),
46    /// Timestamp with microsecond precision and timezone offset information
47    TimestampTz(TimestampTz),
48    /// Time interval representation for duration calculations
49    Interval(Interval),
50}
51
52/// Represents a calendar date (year, month, day) without time component.
53///
54/// The value is stored as days since the Unix epoch (January 1, 1970).
55/// This allows for efficient date arithmetic and comparison operations.
56/// Standard JSON has no native date type and typically uses ISO 8601 strings.
57#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd)]
58pub struct Date {
59    /// Days since Unix epoch (January 1, 1970)
60    /// Positive values represent dates after the epoch, negative values represent dates before
61    pub value: i32,
62}
63
64/// Represents a timestamp (date and time) without timezone information.
65///
66/// The value is stored as microseconds since the Unix epoch (January 1, 1970 00:00:00 UTC).
67/// This provides microsecond precision for timestamp operations.
68/// Standard JSON has no native timestamp type and typically uses ISO 8601 strings.
69#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd)]
70pub struct Timestamp {
71    /// Microseconds since Unix epoch (January 1, 1970 00:00:00 UTC)
72    pub value: i64,
73}
74
75/// Represents a timestamp with timezone information.
76///
77/// Combines a timestamp value with a timezone offset, allowing for
78/// timezone-aware datetime operations. The timestamp is stored in UTC,
79/// and the offset indicates the local timezone.
80/// Standard JSON has no native timezone-aware timestamp type.
81#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd)]
82pub struct TimestampTz {
83    /// Timezone offset in hours from UTC
84    pub offset: i8,
85    /// Microseconds since Unix epoch (January 1, 1970 00:00:00 UTC)
86    pub value: i64,
87}
88
89/// Represents a time interval or duration.
90///
91/// This structure can represent complex time intervals with separate
92/// components for months, days, and microseconds, allowing for precise
93/// duration calculations that account for calendar irregularities.
94/// Standard JSON has no native interval/duration type.
95#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd)]
96pub struct Interval {
97    /// Number of months in the interval
98    pub months: i32,
99    /// Number of days in the interval
100    pub days: i32,
101    /// Number of microseconds in the interval
102    pub micros: i64,
103}
104
105impl Display for Date {
106    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
107        let dur = SignedDuration::from_hours(self.value as i64 * 24);
108        let date = date(1970, 1, 1).checked_add(dur).unwrap();
109        write!(f, "{}", date)
110    }
111}
112
113impl Display for Timestamp {
114    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
115        let micros = self.value;
116        let (mut secs, mut nanos) = (micros / MICROS_PER_SEC, (micros % MICROS_PER_SEC) * 1_000);
117        if nanos < 0 {
118            secs -= 1;
119            nanos += 1_000_000_000;
120        }
121
122        if secs > 253402207200 {
123            secs = 253402207200;
124            nanos = 0;
125        } else if secs < -377705023201 {
126            secs = -377705023201;
127            nanos = 0;
128        }
129        let ts = jiff::Timestamp::new(secs, nanos as i32).unwrap();
130
131        write!(f, "{}", strtime::format(TIMESTAMP_FORMAT, ts).unwrap())
132    }
133}
134
135impl Display for TimestampTz {
136    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
137        let micros = self.value;
138        let (mut secs, mut nanos) = (micros / MICROS_PER_SEC, (micros % MICROS_PER_SEC) * 1_000);
139        if nanos < 0 {
140            secs -= 1;
141            nanos += 1_000_000_000;
142        }
143
144        if secs > 253402207200 {
145            secs = 253402207200;
146            nanos = 0;
147        } else if secs < -377705023201 {
148            secs = -377705023201;
149            nanos = 0;
150        }
151        let ts = jiff::Timestamp::new(secs, nanos as i32).unwrap();
152        let tz = Offset::constant(self.offset).to_time_zone();
153        let zoned = ts.to_zoned(tz);
154
155        write!(f, "{}", strtime::format(TIMESTAMP_FORMAT, &zoned).unwrap())
156    }
157}
158
159impl Display for Interval {
160    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
161        let mut date_parts = vec![];
162        let years = self.months / MONTHS_PER_YEAR;
163        let months = self.months % MONTHS_PER_YEAR;
164        match years.cmp(&1) {
165            Ordering::Equal => {
166                date_parts.push((years, "year"));
167            }
168            Ordering::Greater => {
169                date_parts.push((years, "years"));
170            }
171            _ => {}
172        }
173        match months.cmp(&1) {
174            Ordering::Equal => {
175                date_parts.push((months, "month"));
176            }
177            Ordering::Greater => {
178                date_parts.push((months, "months"));
179            }
180            _ => {}
181        }
182        match self.days.cmp(&1) {
183            Ordering::Equal => {
184                date_parts.push((self.days, "day"));
185            }
186            Ordering::Greater => {
187                date_parts.push((self.days, "days"));
188            }
189            _ => {}
190        }
191        if !date_parts.is_empty() {
192            for (i, (val, name)) in date_parts.into_iter().enumerate() {
193                if i > 0 {
194                    write!(f, " ")?;
195                }
196                write!(f, "{} {}", val, name)?;
197            }
198            if self.micros != 0 {
199                write!(f, " ")?;
200            }
201        }
202
203        if self.micros != 0 {
204            let mut micros = self.micros;
205            if micros < 0 {
206                write!(f, "-")?;
207                micros = -micros;
208            }
209            let hour = micros / MICROS_PER_HOUR;
210            micros -= hour * MICROS_PER_HOUR;
211            let min = micros / MICROS_PER_MINUTE;
212            micros -= min * MICROS_PER_MINUTE;
213            let sec = micros / MICROS_PER_SEC;
214            micros -= sec * MICROS_PER_SEC;
215
216            if hour < 100 {
217                write!(f, "{:02}:{:02}:{:02}", hour, min, sec)?;
218            } else {
219                write!(f, "{}:{:02}:{:02}", hour, min, sec)?;
220            }
221            if micros != 0 {
222                write!(f, ".{:06}", micros)?;
223            }
224        } else if self.months == 0 && self.days == 0 {
225            write!(f, "00:00:00")?;
226        }
227        Ok(())
228    }
229}
230
231impl Display for ExtensionValue<'_> {
232    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
233        match self {
234            ExtensionValue::Binary(v) => {
235                for c in *v {
236                    write!(f, "{c:02X}")?;
237                }
238                Ok(())
239            }
240            ExtensionValue::Date(v) => write!(f, "{}", v),
241            ExtensionValue::Timestamp(v) => write!(f, "{}", v),
242            ExtensionValue::TimestampTz(v) => write!(f, "{}", v),
243            ExtensionValue::Interval(v) => write!(f, "{}", v),
244        }
245    }
246}
247
248impl Eq for ExtensionValue<'_> {}
249
250impl PartialEq for ExtensionValue<'_> {
251    fn eq(&self, other: &Self) -> bool {
252        self.partial_cmp(other) == Some(Ordering::Equal)
253    }
254}
255
256#[allow(clippy::non_canonical_partial_ord_impl)]
257impl PartialOrd for ExtensionValue<'_> {
258    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
259        let self_level = match self {
260            ExtensionValue::Binary(_) => 0,
261            ExtensionValue::Date(_) => 1,
262            ExtensionValue::Timestamp(_) => 2,
263            ExtensionValue::TimestampTz(_) => 3,
264            ExtensionValue::Interval(_) => 4,
265        };
266        let other_level = match other {
267            ExtensionValue::Binary(_) => 0,
268            ExtensionValue::Date(_) => 1,
269            ExtensionValue::Timestamp(_) => 2,
270            ExtensionValue::TimestampTz(_) => 3,
271            ExtensionValue::Interval(_) => 4,
272        };
273        let res = self_level.cmp(&other_level);
274        if matches!(res, Ordering::Greater | Ordering::Less) {
275            return Some(res);
276        }
277
278        match (self, other) {
279            (ExtensionValue::Binary(self_data), ExtensionValue::Binary(other_data)) => {
280                Some(self_data.cmp(other_data))
281            }
282            (ExtensionValue::Date(self_data), ExtensionValue::Date(other_data)) => {
283                Some(self_data.cmp(other_data))
284            }
285            (ExtensionValue::Timestamp(self_data), ExtensionValue::Timestamp(other_data)) => {
286                Some(self_data.cmp(other_data))
287            }
288            (ExtensionValue::TimestampTz(self_data), ExtensionValue::TimestampTz(other_data)) => {
289                Some(self_data.cmp(other_data))
290            }
291            (ExtensionValue::Interval(self_data), ExtensionValue::Interval(other_data)) => {
292                Some(self_data.cmp(other_data))
293            }
294            (_, _) => None,
295        }
296    }
297}