Skip to main content

minarrow/traits/
print.rs

1// Copyright 2025 Peter Garfield Bower
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! # **Print Module** - *Pretty Printing with Attitude*
16//!
17//! Contains implementations of the Display trait
18//! and an additional `Print` trait which wraps it to provide
19//! `myobj.print()` for any object that implements it.
20use std::fmt::{self, Display, Formatter};
21
22use crate::{Array, Buffer, Float, NumericArray, TextArray};
23#[cfg(feature = "datetime")]
24use crate::{DatetimeArray, Integer, TemporalArray};
25
26pub(crate) const MAX_PREVIEW: usize = 50;
27
28/// # Print
29///
30/// Loaded print trait for pretty printing tables
31///
32/// Provides a more convenient way to activate `Display`
33/// for other types such as arrays via `myarr.print()`,
34/// avoiding the need to write `println!("{}", myarr);`
35pub trait Print {
36    #[inline]
37    fn print(&self)
38    where
39        Self: Display,
40    {
41        println!("{}", self);
42    }
43}
44
45impl<T: Display> Print for T where T: Display {}
46
47// Helper functions
48
49pub(crate) fn value_to_string(arr: &Array, idx: usize) -> String {
50    // Null checks (handles absent mask too)
51    if let Some(mask) = arr.null_mask() {
52        if !mask.get(idx) {
53            return "null".into();
54        }
55    }
56    match arr {
57        // ------------------------- numeric ------------------------------
58        Array::NumericArray(inner) => match inner {
59            NumericArray::Int32(a) => a.data[idx].to_string(),
60            NumericArray::Int64(a) => a.data[idx].to_string(),
61            #[cfg(feature = "extended_numeric_types")]
62            NumericArray::Int8(a) => a.data[idx].to_string(),
63            #[cfg(feature = "extended_numeric_types")]
64            NumericArray::Int16(a) => a.data[idx].to_string(),
65            NumericArray::UInt32(a) => a.data[idx].to_string(),
66            NumericArray::UInt64(a) => a.data[idx].to_string(),
67            #[cfg(feature = "extended_numeric_types")]
68            NumericArray::UInt8(a) => a.data[idx].to_string(),
69            #[cfg(feature = "extended_numeric_types")]
70            NumericArray::UInt16(a) => a.data[idx].to_string(),
71            NumericArray::Float32(a) => format_float(a.data[idx] as f64),
72            NumericArray::Float64(a) => format_float(a.data[idx]),
73            NumericArray::Null => "null".into(),
74        },
75        // ------------------------- boolean ------------------------------
76        Array::BooleanArray(b) => {
77            let bit = b.data.get(idx);
78            bit.to_string()
79        }
80        // ------------------------- string / categorical -----------------
81        Array::TextArray(inner) => match inner {
82            TextArray::String32(s) => string_value(&s.offsets, &s.data, idx),
83            #[cfg(feature = "large_string")]
84            TextArray::String64(s) => string_value(&s.offsets, &s.data, idx),
85            #[cfg(any(not(feature = "default_categorical_8"), feature = "extended_categorical"))]
86            TextArray::Categorical32(cat) => {
87                let key = cat.data[idx] as usize;
88                cat.unique_values()[key].clone()
89            }
90            #[cfg(feature = "default_categorical_8")]
91            TextArray::Categorical8(cat) => {
92                let key = cat.data[idx] as usize;
93                cat.unique_values()[key].clone()
94            }
95            #[cfg(feature = "extended_categorical")]
96            TextArray::Categorical16(cat) => {
97                let key = cat.data[idx] as usize;
98                cat.unique_values()[key].clone()
99            }
100            #[cfg(feature = "extended_categorical")]
101            TextArray::Categorical64(cat) => {
102                let key = cat.data[idx] as usize;
103                cat.unique_values()[key].clone()
104            }
105            TextArray::Null => "null".into(),
106        },
107        // ------------------------- datetime -----------------------------
108        #[cfg(feature = "datetime")]
109        Array::TemporalArray(inner) => match inner {
110            TemporalArray::Datetime32(dt) => format_datetime_value(dt, idx, None),
111            TemporalArray::Datetime64(dt) => format_datetime_value(dt, idx, None),
112            TemporalArray::Null => "null".into(),
113        },
114        // ------------------------- fallback -----------------------------
115        Array::Null => "null".into(),
116    }
117}
118
119fn string_value<T: Copy>(offsets: &Buffer<T>, data: &Buffer<u8>, idx: usize) -> String
120where
121    T: Copy + Into<u64>,
122{
123    // Convert to u64, then to usize (explicitly)
124    let start = offsets[idx].into() as usize;
125    let end = offsets[idx + 1].into() as usize;
126    let slice = &data[start..end];
127
128    // Safety: Arrow guarantees valid UTF-8 encoding
129    let s = unsafe { std::str::from_utf8_unchecked(slice) };
130    s.to_string()
131}
132
133pub(crate) fn print_rule(
134    f: &mut Formatter<'_>,
135    idx_width: usize,
136    col_widths: &[usize],
137) -> fmt::Result {
138    write!(f, "+{:-<w$}+", "", w = idx_width + 2)?; // idx column (+2 for spaces)
139    for &w in col_widths {
140        write!(f, "{:-<w$}+", "", w = w + 2)?; // +2 for spaces
141    }
142    writeln!(f)
143}
144
145pub(crate) fn print_header_row(
146    f: &mut Formatter<'_>,
147    idx_width: usize,
148    headers: &[String],
149    col_widths: &[usize],
150) -> fmt::Result {
151    write!(f, "| {hdr:^w$} |", hdr = "idx", w = idx_width)?;
152    for (hdr, &w) in headers.iter().zip(col_widths) {
153        write!(f, " {hdr:^w$} |", hdr = hdr, w = w)?;
154    }
155    writeln!(f)
156}
157
158pub(crate) fn print_ellipsis_row(
159    f: &mut Formatter<'_>,
160    idx_width: usize,
161    col_widths: &[usize],
162) -> fmt::Result {
163    write!(f, "| {dots:^w$} |", dots = "…", w = idx_width)?;
164    for &w in col_widths {
165        write!(f, " {dots:^w$} |", dots = "…", w = w)?;
166    }
167    writeln!(f)
168}
169
170/// Formats floating point numbers:
171/// - Keeps up to 6 decimal digits
172/// - Trims trailing zeroes and unnecessary decimal point
173#[inline]
174pub(crate) fn format_float<T: Float + Display>(v: T) -> String {
175    let s = format!("{:.6}", v);
176    if s.contains('.') {
177        s.trim_end_matches('0').trim_end_matches('.').to_string()
178    } else {
179        s
180    }
181}
182
183#[cfg(feature = "datetime")]
184pub(crate) fn format_datetime_value<T>(
185    arr: &DatetimeArray<T>,
186    idx: usize,
187    timezone: Option<&str>,
188) -> String
189where
190    T: Integer + std::fmt::Display,
191{
192    use crate::MaskedArray;
193    if arr.is_null(idx) {
194        return "null".into();
195    }
196
197    #[cfg(feature = "datetime_ops")]
198    {
199        use crate::TimeUnit;
200        use time::OffsetDateTime;
201
202        let utc_dt = match arr.time_unit {
203            TimeUnit::Seconds => {
204                let secs = arr.data[idx].to_i64().unwrap();
205                OffsetDateTime::from_unix_timestamp(secs).ok()
206            }
207            TimeUnit::Milliseconds => {
208                let v = arr.data[idx].to_i64().unwrap();
209                OffsetDateTime::from_unix_timestamp_nanos((v as i128) * 1_000_000).ok()
210            }
211            TimeUnit::Microseconds => {
212                let v = arr.data[idx].to_i64().unwrap();
213                OffsetDateTime::from_unix_timestamp_nanos((v as i128) * 1_000).ok()
214            }
215            TimeUnit::Nanoseconds => {
216                let v = arr.data[idx].to_i64().unwrap();
217                OffsetDateTime::from_unix_timestamp_nanos(v as i128).ok()
218            }
219            TimeUnit::Days => {
220                use crate::structs::variants::datetime::UNIX_EPOCH_JULIAN_DAY;
221                let days = arr.data[idx].to_i64().unwrap();
222                time::Date::from_julian_day((days + UNIX_EPOCH_JULIAN_DAY) as i32)
223                    .ok()
224                    .and_then(|d| d.with_hms(0, 0, 0).ok())
225                    .map(|dt| dt.assume_utc())
226            }
227        };
228
229        if let Some(dt) = utc_dt {
230            if let Some(tz) = timezone {
231                format_with_timezone(dt, tz)
232            } else {
233                dt.to_string()
234            }
235        } else {
236            let v = arr.data[idx];
237            let suffix = match arr.time_unit {
238                TimeUnit::Seconds => "s",
239                TimeUnit::Milliseconds => "ms",
240                TimeUnit::Microseconds => "µs",
241                TimeUnit::Nanoseconds => "ns",
242                TimeUnit::Days => "d",
243            };
244            format!("{v}{suffix}")
245        }
246    }
247    #[cfg(not(feature = "datetime_ops"))]
248    {
249        use crate::TimeUnit;
250
251        if timezone.is_some() {
252            panic!(
253                "Timezone functionality requires the 'datetime_ops' feature. \
254                Enable it in Cargo.toml with: features = [\"datetime_ops\"]"
255            );
256        }
257
258        let v = arr.data[idx];
259        let suffix = match arr.time_unit {
260            TimeUnit::Seconds => "s",
261            TimeUnit::Milliseconds => "ms",
262            TimeUnit::Microseconds => "µs",
263            TimeUnit::Nanoseconds => "ns",
264            TimeUnit::Days => "d",
265        };
266        format!("{v}{suffix}")
267    }
268}
269
270#[cfg(all(feature = "datetime", feature = "datetime_ops"))]
271fn format_with_timezone(utc_dt: time::OffsetDateTime, tz: &str) -> String {
272    // Try to parse as offset string first (e.g., "+05:00", "-08:00")
273    if let Some(offset) = parse_timezone_offset(tz) {
274        let local_dt = utc_dt.to_offset(offset);
275        format!("{} {}", local_dt, tz)
276    } else {
277        // For IANA timezones (e.g., "America/New_York"), we can't do full conversion
278        // without a timezone database. Just append the timezone name.
279        format!("{} {}", utc_dt, tz)
280    }
281}
282
283#[cfg(all(feature = "datetime", feature = "datetime_ops"))]
284fn parse_timezone_offset(tz: &str) -> Option<time::UtcOffset> {
285    use crate::structs::variants::datetime::tz::lookup_timezone;
286    use time::UtcOffset;
287
288    // First try timezone database lookup (handles IANA IDs, abbreviations, and direct offsets)
289    let tz_offset = lookup_timezone(tz)?;
290
291    // Now parse the resolved offset string
292    let tz = tz_offset.trim();
293
294    // Handle UTC specially
295    if tz.eq_ignore_ascii_case("UTC") || tz.eq_ignore_ascii_case("Z") {
296        return Some(UtcOffset::UTC);
297    }
298
299    // Parse offset strings like "+05:00", "-08:00", "+0530"
300    if !tz.starts_with('+') && !tz.starts_with('-') {
301        return None;
302    }
303
304    let (sign, rest) = tz.split_at(1);
305    let sign = if sign == "+" { 1 } else { -1 };
306
307    // Try parsing HH:MM format
308    if let Some((hours_str, mins_str)) = rest.split_once(':') {
309        let hours: i8 = hours_str.parse().ok()?;
310        let mins: i8 = mins_str.parse().ok()?;
311        let seconds = sign * (hours as i32 * 3600 + mins as i32 * 60);
312        return UtcOffset::from_whole_seconds(seconds).ok();
313    }
314
315    // Try parsing HHMM format
316    if rest.len() == 4 {
317        let hours: i8 = rest[0..2].parse().ok()?;
318        let mins: i8 = rest[2..4].parse().ok()?;
319        let seconds = sign * (hours as i32 * 3600 + mins as i32 * 60);
320        return UtcOffset::from_whole_seconds(seconds).ok();
321    }
322
323    None
324}