minarrow/traits/
print.rs

1//! # **Print Module** - *Pretty Printing with Attitude*
2//!
3//! Contains implementations of the Display trait
4//! and an additional `Print` trait which wraps it to provide
5//! `myobj.print()` for any object that implements it.
6use std::fmt::{self, Display, Formatter};
7
8use crate::{Array, Buffer, Float, NumericArray, TextArray};
9#[cfg(feature = "datetime")]
10use crate::{DatetimeArray, Integer, TemporalArray};
11
12pub(crate) const MAX_PREVIEW: usize = 50;
13
14/// # Print
15///
16/// Loaded print trait for pretty printing tables
17///
18/// Provides a more convenient way to activate `Display`
19/// for other types such as arrays via `myarr.print()`,
20/// avoiding the need to write `println!("{}", myarr);`
21pub trait Print {
22    #[inline]
23    fn print(&self)
24    where
25        Self: Display,
26    {
27        println!("{}", self);
28    }
29}
30
31impl<T: Display> Print for T where T: Display {}
32
33// Helper functions
34
35pub(crate) fn value_to_string(arr: &Array, idx: usize) -> String {
36    // Null checks (handles absent mask too)
37    if let Some(mask) = arr.null_mask() {
38        if !mask.get(idx) {
39            return "null".into();
40        }
41    }
42    match arr {
43        // ------------------------- numeric ------------------------------
44        Array::NumericArray(inner) => match inner {
45            NumericArray::Int32(a) => a.data[idx].to_string(),
46            NumericArray::Int64(a) => a.data[idx].to_string(),
47            #[cfg(feature = "extended_numeric_types")]
48            NumericArray::Int8(a) => a.data[idx].to_string(),
49            #[cfg(feature = "extended_numeric_types")]
50            NumericArray::Int16(a) => a.data[idx].to_string(),
51            NumericArray::UInt32(a) => a.data[idx].to_string(),
52            NumericArray::UInt64(a) => a.data[idx].to_string(),
53            #[cfg(feature = "extended_numeric_types")]
54            NumericArray::UInt8(a) => a.data[idx].to_string(),
55            #[cfg(feature = "extended_numeric_types")]
56            NumericArray::UInt16(a) => a.data[idx].to_string(),
57            NumericArray::Float32(a) => format_float(a.data[idx] as f64),
58            NumericArray::Float64(a) => format_float(a.data[idx]),
59            NumericArray::Null => "null".into(),
60        },
61        // ------------------------- boolean ------------------------------
62        Array::BooleanArray(b) => {
63            let bit = b.data.get(idx);
64            bit.to_string()
65        }
66        // ------------------------- string / categorical -----------------
67        Array::TextArray(inner) => match inner {
68            TextArray::String32(s) => string_value(&s.offsets, &s.data, idx),
69            #[cfg(feature = "large_string")]
70            TextArray::String64(s) => string_value(&s.offsets, &s.data, idx),
71            TextArray::Categorical32(cat) => {
72                let key = cat.data[idx] as usize;
73                cat.unique_values[key].clone()
74            }
75            #[cfg(feature = "extended_categorical")]
76            TextArray::Categorical8(cat) => {
77                let key = cat.data[idx] as usize;
78                cat.unique_values[key].clone()
79            }
80            #[cfg(feature = "extended_categorical")]
81            TextArray::Categorical16(cat) => {
82                let key = cat.data[idx] as usize;
83                cat.unique_values[key].clone()
84            }
85            #[cfg(feature = "extended_categorical")]
86            TextArray::Categorical64(cat) => {
87                let key = cat.data[idx] as usize;
88                cat.unique_values[key].clone()
89            }
90            TextArray::Null => "null".into(),
91        },
92        // ------------------------- datetime -----------------------------
93        #[cfg(feature = "datetime")]
94        Array::TemporalArray(inner) => match inner {
95            TemporalArray::Datetime32(dt) => format_datetime_value(dt, idx, None),
96            TemporalArray::Datetime64(dt) => format_datetime_value(dt, idx, None),
97            TemporalArray::Null => "null".into(),
98        },
99        // ------------------------- fallback -----------------------------
100        Array::Null => "null".into(),
101    }
102}
103
104fn string_value<T: Copy>(offsets: &Buffer<T>, data: &Buffer<u8>, idx: usize) -> String
105where
106    T: Copy + Into<u64>,
107{
108    // Convert to u64, then to usize (explicitly)
109    let start = offsets[idx].into() as usize;
110    let end = offsets[idx + 1].into() as usize;
111    let slice = &data[start..end];
112
113    // Safety: Arrow guarantees valid UTF-8 encoding
114    let s = unsafe { std::str::from_utf8_unchecked(slice) };
115    s.to_string()
116}
117
118pub(crate) fn print_rule(
119    f: &mut Formatter<'_>,
120    idx_width: usize,
121    col_widths: &[usize],
122) -> fmt::Result {
123    write!(f, "+{:-<w$}+", "", w = idx_width + 2)?; // idx column (+2 for spaces)
124    for &w in col_widths {
125        write!(f, "{:-<w$}+", "", w = w + 2)?; // +2 for spaces
126    }
127    writeln!(f)
128}
129
130pub(crate) fn print_header_row(
131    f: &mut Formatter<'_>,
132    idx_width: usize,
133    headers: &[String],
134    col_widths: &[usize],
135) -> fmt::Result {
136    write!(f, "| {hdr:^w$} |", hdr = "idx", w = idx_width)?;
137    for (hdr, &w) in headers.iter().zip(col_widths) {
138        write!(f, " {hdr:^w$} |", hdr = hdr, w = w)?;
139    }
140    writeln!(f)
141}
142
143pub(crate) fn print_ellipsis_row(
144    f: &mut Formatter<'_>,
145    idx_width: usize,
146    col_widths: &[usize],
147) -> fmt::Result {
148    write!(f, "| {dots:^w$} |", dots = "…", w = idx_width)?;
149    for &w in col_widths {
150        write!(f, " {dots:^w$} |", dots = "…", w = w)?;
151    }
152    writeln!(f)
153}
154
155/// Formats floating point numbers:
156/// - Keeps up to 6 decimal digits
157/// - Trims trailing zeroes and unnecessary decimal point
158#[inline]
159pub(crate) fn format_float<T: Float + Display>(v: T) -> String {
160    let s = format!("{:.6}", v);
161    if s.contains('.') {
162        s.trim_end_matches('0').trim_end_matches('.').to_string()
163    } else {
164        s
165    }
166}
167
168#[cfg(feature = "datetime")]
169pub(crate) fn format_datetime_value<T>(
170    arr: &DatetimeArray<T>,
171    idx: usize,
172    timezone: Option<&str>,
173) -> String
174where
175    T: Integer + std::fmt::Display,
176{
177    use crate::MaskedArray;
178    if arr.is_null(idx) {
179        return "null".into();
180    }
181
182    #[cfg(feature = "datetime_ops")]
183    {
184        use crate::TimeUnit;
185        use time::OffsetDateTime;
186
187        let utc_dt = match arr.time_unit {
188            TimeUnit::Seconds => {
189                let secs = arr.data[idx].to_i64().unwrap();
190                OffsetDateTime::from_unix_timestamp(secs).ok()
191            }
192            TimeUnit::Milliseconds => {
193                let v = arr.data[idx].to_i64().unwrap();
194                OffsetDateTime::from_unix_timestamp_nanos((v as i128) * 1_000_000).ok()
195            }
196            TimeUnit::Microseconds => {
197                let v = arr.data[idx].to_i64().unwrap();
198                OffsetDateTime::from_unix_timestamp_nanos((v as i128) * 1_000).ok()
199            }
200            TimeUnit::Nanoseconds => {
201                let v = arr.data[idx].to_i64().unwrap();
202                OffsetDateTime::from_unix_timestamp_nanos(v as i128).ok()
203            }
204            TimeUnit::Days => {
205                use crate::structs::variants::datetime::UNIX_EPOCH_JULIAN_DAY;
206                let days = arr.data[idx].to_i64().unwrap();
207                time::Date::from_julian_day((days + UNIX_EPOCH_JULIAN_DAY) as i32)
208                    .ok()
209                    .and_then(|d| d.with_hms(0, 0, 0).ok())
210                    .map(|dt| dt.assume_utc())
211            }
212        };
213
214        if let Some(dt) = utc_dt {
215            if let Some(tz) = timezone {
216                format_with_timezone(dt, tz)
217            } else {
218                dt.to_string()
219            }
220        } else {
221            let v = arr.data[idx];
222            let suffix = match arr.time_unit {
223                TimeUnit::Seconds => "s",
224                TimeUnit::Milliseconds => "ms",
225                TimeUnit::Microseconds => "µs",
226                TimeUnit::Nanoseconds => "ns",
227                TimeUnit::Days => "d",
228            };
229            format!("{v}{suffix}")
230        }
231    }
232    #[cfg(not(feature = "datetime_ops"))]
233    {
234        use crate::TimeUnit;
235
236        if timezone.is_some() {
237            panic!(
238                "Timezone functionality requires the 'datetime_ops' feature. \
239                Enable it in Cargo.toml with: features = [\"datetime_ops\"]"
240            );
241        }
242
243        let v = arr.data[idx];
244        let suffix = match arr.time_unit {
245            TimeUnit::Seconds => "s",
246            TimeUnit::Milliseconds => "ms",
247            TimeUnit::Microseconds => "µs",
248            TimeUnit::Nanoseconds => "ns",
249            TimeUnit::Days => "d",
250        };
251        format!("{v}{suffix}")
252    }
253}
254
255#[cfg(all(feature = "datetime", feature = "datetime_ops"))]
256fn format_with_timezone(utc_dt: time::OffsetDateTime, tz: &str) -> String {
257    // Try to parse as offset string first (e.g., "+05:00", "-08:00")
258    if let Some(offset) = parse_timezone_offset(tz) {
259        let local_dt = utc_dt.to_offset(offset);
260        format!("{} {}", local_dt, tz)
261    } else {
262        // For IANA timezones (e.g., "America/New_York"), we can't do full conversion
263        // without a timezone database. Just append the timezone name.
264        format!("{} {}", utc_dt, tz)
265    }
266}
267
268#[cfg(all(feature = "datetime", feature = "datetime_ops"))]
269fn parse_timezone_offset(tz: &str) -> Option<time::UtcOffset> {
270    use crate::structs::variants::datetime::tz::lookup_timezone;
271    use time::UtcOffset;
272
273    // First try timezone database lookup (handles IANA IDs, abbreviations, and direct offsets)
274    let tz_offset = lookup_timezone(tz)?;
275
276    // Now parse the resolved offset string
277    let tz = tz_offset.trim();
278
279    // Handle UTC specially
280    if tz.eq_ignore_ascii_case("UTC") || tz.eq_ignore_ascii_case("Z") {
281        return Some(UtcOffset::UTC);
282    }
283
284    // Parse offset strings like "+05:00", "-08:00", "+0530"
285    if !tz.starts_with('+') && !tz.starts_with('-') {
286        return None;
287    }
288
289    let (sign, rest) = tz.split_at(1);
290    let sign = if sign == "+" { 1 } else { -1 };
291
292    // Try parsing HH:MM format
293    if let Some((hours_str, mins_str)) = rest.split_once(':') {
294        let hours: i8 = hours_str.parse().ok()?;
295        let mins: i8 = mins_str.parse().ok()?;
296        let seconds = sign * (hours as i32 * 3600 + mins as i32 * 60);
297        return UtcOffset::from_whole_seconds(seconds).ok();
298    }
299
300    // Try parsing HHMM format
301    if rest.len() == 4 {
302        let hours: i8 = rest[0..2].parse().ok()?;
303        let mins: i8 = rest[2..4].parse().ok()?;
304        let seconds = sign * (hours as i32 * 3600 + mins as i32 * 60);
305        return UtcOffset::from_whole_seconds(seconds).ok();
306    }
307
308    None
309}