nu_data/base/
shape.rs

1use bigdecimal::BigDecimal;
2use chrono::{DateTime, FixedOffset};
3use indexmap::map::IndexMap;
4use nu_protocol::RangeInclusion;
5use nu_protocol::{format_primitive, ColumnPath, Dictionary, Primitive, UntaggedValue, Value};
6use nu_source::{DbgDocBldr, DebugDocBuilder, PrettyDebug, Tag};
7use num_bigint::BigInt;
8use num_format::{Locale, ToFormattedString};
9use serde::{Deserialize, Serialize};
10use std::cmp::Ordering;
11use std::fmt::Debug;
12use std::hash::{Hash, Hasher};
13use std::path::PathBuf;
14use sys_locale::get_locale;
15
16#[cfg(feature = "dataframe")]
17use nu_protocol::dataframe::{FrameStruct, NuDataFrame};
18
19#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Deserialize, Serialize)]
20pub struct InlineRange {
21    from: (InlineShape, RangeInclusion),
22    to: (InlineShape, RangeInclusion),
23}
24
25#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Deserialize, Serialize)]
26pub enum InlineShape {
27    Nothing,
28    Int(i64),
29    BigInt(BigInt),
30    Decimal(BigDecimal),
31    Range(Box<InlineRange>),
32    Bytesize(u64),
33    String(String),
34    Line(String),
35    ColumnPath(ColumnPath),
36    GlobPattern(String),
37    Boolean(bool),
38    Date(DateTime<FixedOffset>),
39    Duration(BigInt),
40    FilePath(PathBuf),
41    Binary(usize),
42
43    Row(Row),
44    Table(Vec<InlineShape>),
45
46    // TODO: Block arguments
47    Block,
48    // TODO: Error type
49    Error,
50
51    #[cfg(feature = "dataframe")]
52    DataFrame(String),
53
54    #[cfg(feature = "dataframe")]
55    FrameStruct(String),
56
57    // Stream markers (used as bookend markers rather than actual values)
58    BeginningOfStream,
59    EndOfStream,
60}
61
62pub struct FormatInlineShape {
63    shape: InlineShape,
64    column: Option<Column>,
65}
66
67pub fn get_config_filesize_metric() -> bool {
68    let res = crate::config::config(Tag::unknown());
69    if res.is_err() {
70        return true;
71    }
72    let value = res
73        .unwrap_or_default()
74        .get("filesize_metric")
75        .map(|s| s.value.is_true())
76        .unwrap_or(true);
77    value
78}
79
80impl InlineShape {
81    pub fn from_primitive(primitive: &Primitive) -> InlineShape {
82        match primitive {
83            Primitive::Nothing => InlineShape::Nothing,
84            Primitive::Int(int) => InlineShape::Int(*int),
85            Primitive::BigInt(int) => InlineShape::BigInt(int.clone()),
86            Primitive::Range(range) => {
87                let (left, left_inclusion) = &range.from;
88                let (right, right_inclusion) = &range.to;
89
90                InlineShape::Range(Box::new(InlineRange {
91                    from: (InlineShape::from_primitive(left), *left_inclusion),
92                    to: (InlineShape::from_primitive(right), *right_inclusion),
93                }))
94            }
95            Primitive::Decimal(decimal) => InlineShape::Decimal(decimal.clone()),
96            Primitive::Filesize(bytesize) => InlineShape::Bytesize(*bytesize),
97            Primitive::String(string) => InlineShape::String(string.clone()),
98            Primitive::ColumnPath(path) => InlineShape::ColumnPath(path.clone()),
99            Primitive::GlobPattern(pattern) => InlineShape::GlobPattern(pattern.clone()),
100            Primitive::Boolean(boolean) => InlineShape::Boolean(*boolean),
101            Primitive::Date(date) => InlineShape::Date(*date),
102            Primitive::Duration(duration) => InlineShape::Duration(duration.clone()),
103            Primitive::FilePath(path) => InlineShape::FilePath(path.clone()),
104            Primitive::Binary(b) => InlineShape::Binary(b.len()),
105            Primitive::BeginningOfStream => InlineShape::BeginningOfStream,
106            Primitive::EndOfStream => InlineShape::EndOfStream,
107        }
108    }
109
110    pub fn from_dictionary(dictionary: &Dictionary) -> InlineShape {
111        let mut map = IndexMap::new();
112
113        for (key, value) in &dictionary.entries {
114            let column = Column::String(key.clone());
115            map.insert(column, InlineShape::from_value(value));
116        }
117
118        InlineShape::Row(Row { map })
119    }
120
121    pub fn from_table<'a>(table: impl IntoIterator<Item = &'a Value>) -> InlineShape {
122        let vec = table.into_iter().map(InlineShape::from_value).collect();
123
124        InlineShape::Table(vec)
125    }
126
127    #[cfg(feature = "dataframe")]
128    pub fn from_df(df: &NuDataFrame) -> InlineShape {
129        let msg = format!("{} rows {} cols", df.as_ref().height(), df.as_ref().width());
130
131        InlineShape::DataFrame(msg)
132    }
133
134    #[cfg(feature = "dataframe")]
135    pub fn from_frame_struct(s: &FrameStruct) -> InlineShape {
136        match s {
137            FrameStruct::GroupBy(groupby) => {
138                let msg = groupby.by().join(",");
139                let msg = format!("groupby {}", msg);
140                InlineShape::DataFrame(msg)
141            }
142        }
143    }
144
145    pub fn from_value<'a>(value: impl Into<&'a UntaggedValue>) -> InlineShape {
146        match value.into() {
147            UntaggedValue::Primitive(p) => InlineShape::from_primitive(p),
148            UntaggedValue::Row(row) => InlineShape::from_dictionary(row),
149            UntaggedValue::Table(table) => InlineShape::from_table(table),
150            UntaggedValue::Error(_) => InlineShape::Error,
151            UntaggedValue::Block(_) => InlineShape::Block,
152            #[cfg(feature = "dataframe")]
153            UntaggedValue::DataFrame(df) => InlineShape::from_df(df),
154            #[cfg(feature = "dataframe")]
155            UntaggedValue::FrameStruct(s) => InlineShape::from_frame_struct(s),
156        }
157    }
158
159    #[allow(unused)]
160    pub fn format_for_column(self, column: impl Into<Column>) -> FormatInlineShape {
161        FormatInlineShape {
162            shape: self,
163            column: Some(column.into()),
164        }
165    }
166
167    pub fn format(self) -> FormatInlineShape {
168        FormatInlineShape {
169            shape: self,
170            column: None,
171        }
172    }
173
174    pub fn format_bytes(bytesize: u64, forced_format: Option<&str>) -> (DbgDocBldr, String) {
175        use bigdecimal::ToPrimitive;
176
177        // get the config value, if it doesn't exist make it 'auto' so it works how it originally did
178        let filesize_format_var;
179        if let Some(fmt) = forced_format {
180            filesize_format_var = fmt.to_ascii_lowercase();
181        } else {
182            filesize_format_var = match crate::config::config(Tag::unknown()) {
183                Ok(cfg) => cfg
184                    .get("filesize_format")
185                    .map(|val| val.convert_to_string().to_ascii_lowercase())
186                    .unwrap_or_else(|| "auto".to_string()),
187                _ => "auto".to_string(),
188            }
189        }
190
191        // if there is a value, match it to one of the valid values for byte units
192        let filesize_format = match filesize_format_var.as_str() {
193            "b" => (byte_unit::ByteUnit::B, ""),
194            "kb" => (byte_unit::ByteUnit::KB, ""),
195            "kib" => (byte_unit::ByteUnit::KiB, ""),
196            "mb" => (byte_unit::ByteUnit::MB, ""),
197            "mib" => (byte_unit::ByteUnit::MiB, ""),
198            "gb" => (byte_unit::ByteUnit::GB, ""),
199            "gib" => (byte_unit::ByteUnit::GiB, ""),
200            "tb" => (byte_unit::ByteUnit::TB, ""),
201            "tib" => (byte_unit::ByteUnit::TiB, ""),
202            "pb" => (byte_unit::ByteUnit::PB, ""),
203            "pib" => (byte_unit::ByteUnit::PiB, ""),
204            "eb" => (byte_unit::ByteUnit::EB, ""),
205            "eib" => (byte_unit::ByteUnit::EiB, ""),
206            "zb" => (byte_unit::ByteUnit::ZB, ""),
207            "zib" => (byte_unit::ByteUnit::ZiB, ""),
208            _ => (byte_unit::ByteUnit::B, "auto"),
209        };
210
211        if let Some(value) = bytesize.to_u128() {
212            let byte = byte_unit::Byte::from_bytes(value);
213            let adj_byte =
214                if filesize_format.0 == byte_unit::ByteUnit::B && filesize_format.1 == "auto" {
215                    byte.get_appropriate_unit(!get_config_filesize_metric())
216                } else {
217                    byte.get_adjusted_unit(filesize_format.0)
218                };
219
220            match adj_byte.get_unit() {
221                byte_unit::ByteUnit::B => {
222                    let locale_string = get_locale().unwrap_or_else(|| String::from("en-US"));
223                    // Since get_locale() and Locale::from_name() don't always return the same items
224                    // we need to try and parse it to match. For instance, a valid locale is de_DE
225                    // however Locale::from_name() wants only de so we split and parse it out.
226                    let locale_string = locale_string.replace("_", "-"); // en_AU -> en-AU
227                    let locale = match Locale::from_name(&locale_string) {
228                        Ok(loc) => loc,
229                        _ => {
230                            let all = num_format::Locale::available_names();
231                            let locale_prefix = &locale_string.split('-').collect::<Vec<&str>>();
232                            if all.contains(&locale_prefix[0]) {
233                                // eprintln!("Found alternate: {}", &locale_prefix[0]);
234                                Locale::from_name(locale_prefix[0]).unwrap_or(Locale::en)
235                            } else {
236                                // eprintln!("Unable to find matching locale. Defaulting to en-US");
237                                Locale::en
238                            }
239                        }
240                    };
241                    let locale_byte = adj_byte.get_value() as u64;
242                    let locale_byte_string = locale_byte.to_formatted_string(&locale);
243                    if filesize_format.1 == "auto" {
244                        let doc = (DbgDocBldr::primitive(locale_byte_string)
245                            + DbgDocBldr::space()
246                            + DbgDocBldr::kind("B"))
247                        .group();
248                        (doc.clone(), InlineShape::render_doc(&doc))
249                    } else {
250                        let doc = (DbgDocBldr::primitive(locale_byte_string)).group();
251                        (doc.clone(), InlineShape::render_doc(&doc))
252                    }
253                }
254                _ => {
255                    let doc = DbgDocBldr::primitive(adj_byte.format(1));
256                    (doc.clone(), InlineShape::render_doc(&doc))
257                }
258            }
259        } else {
260            let doc =
261                (DbgDocBldr::primitive(bytesize) + DbgDocBldr::space() + DbgDocBldr::kind("B"))
262                    .group();
263            (doc.clone(), InlineShape::render_doc(&doc))
264        }
265    }
266
267    pub fn render_doc(doc: &DebugDocBuilder) -> String {
268        let mut w = Vec::new();
269        doc.to_doc()
270            .render(1000, &mut w)
271            .expect("Error rendering bytes");
272        String::from_utf8_lossy(&w).to_string()
273    }
274}
275
276impl PrettyDebug for FormatInlineShape {
277    fn pretty(&self) -> DebugDocBuilder {
278        let column = &self.column;
279
280        match &self.shape {
281            InlineShape::Nothing => DbgDocBldr::blank(),
282            InlineShape::Int(int) => DbgDocBldr::primitive(int),
283            InlineShape::BigInt(int) => DbgDocBldr::primitive(int),
284            InlineShape::Decimal(decimal) => DbgDocBldr::description(format_primitive(
285                &Primitive::Decimal(decimal.clone()),
286                None,
287            )),
288            InlineShape::Range(range) => {
289                let (left, left_inclusion) = &range.from;
290                let (right, right_inclusion) = &range.to;
291
292                let op = match (left_inclusion, right_inclusion) {
293                    (RangeInclusion::Inclusive, RangeInclusion::Inclusive) => "..",
294                    (RangeInclusion::Inclusive, RangeInclusion::Exclusive) => "..<",
295                    _ => unimplemented!(
296                        "No syntax for ranges that aren't inclusive on the left and exclusive \
297                         or inclusive on the right"
298                    ),
299                };
300
301                left.clone().format().pretty()
302                    + DbgDocBldr::operator(op)
303                    + right.clone().format().pretty()
304            }
305            InlineShape::Bytesize(bytesize) => {
306                let bytes = InlineShape::format_bytes(*bytesize, None);
307                bytes.0
308            }
309            InlineShape::String(string) => DbgDocBldr::primitive(string),
310            InlineShape::Line(string) => DbgDocBldr::primitive(string),
311            InlineShape::ColumnPath(path) => DbgDocBldr::intersperse(
312                path.iter().map(|member| member.pretty()),
313                DbgDocBldr::keyword("."),
314            ),
315            InlineShape::GlobPattern(pattern) => DbgDocBldr::primitive(pattern),
316            InlineShape::Boolean(boolean) => DbgDocBldr::primitive(
317                match (boolean, column) {
318                    (true, None) => "true",
319                    (false, None) => "false",
320                    (true, Some(Column::String(s))) if !s.is_empty() => s,
321                    (false, Some(Column::String(s))) if !s.is_empty() => "",
322                    (true, Some(_)) => "true",
323                    (false, Some(_)) => "false",
324                }
325                .to_owned(),
326            ),
327            InlineShape::Date(date) => DbgDocBldr::primitive(nu_protocol::format_date(date)),
328            InlineShape::Duration(duration) => DbgDocBldr::description(format_primitive(
329                &Primitive::Duration(duration.clone()),
330                None,
331            )),
332            InlineShape::FilePath(path) => DbgDocBldr::primitive(path.display()),
333            InlineShape::Binary(length) => {
334                DbgDocBldr::opaque(format!("<binary: {} bytes>", length))
335            }
336            InlineShape::Row(row) => DbgDocBldr::delimit(
337                "[",
338                DbgDocBldr::kind("row")
339                    + DbgDocBldr::space()
340                    + if row.map.keys().len() <= 6 {
341                        DbgDocBldr::intersperse(
342                            row.map.keys().map(|key| match key {
343                                Column::String(string) => DbgDocBldr::description(string),
344                                Column::Value => DbgDocBldr::blank(),
345                            }),
346                            DbgDocBldr::space(),
347                        )
348                    } else {
349                        DbgDocBldr::description(format!("{} columns", row.map.keys().len()))
350                    },
351                "]",
352            )
353            .group(),
354            InlineShape::Table(rows) => DbgDocBldr::delimit(
355                "[",
356                DbgDocBldr::kind("table")
357                    + DbgDocBldr::space()
358                    + DbgDocBldr::primitive(rows.len())
359                    + DbgDocBldr::space()
360                    + DbgDocBldr::description("rows"),
361                "]",
362            )
363            .group(),
364            InlineShape::Block => DbgDocBldr::opaque("block"),
365            InlineShape::Error => DbgDocBldr::error("error"),
366            #[cfg(feature = "dataframe")]
367            InlineShape::DataFrame(msg) => DbgDocBldr::delimit(
368                "[",
369                DbgDocBldr::kind("dataframe") + DbgDocBldr::space() + DbgDocBldr::primitive(msg),
370                "]",
371            )
372            .group(),
373            #[cfg(feature = "dataframe")]
374            InlineShape::FrameStruct(msg) => {
375                DbgDocBldr::delimit("[", DbgDocBldr::primitive(msg), "]").group()
376            }
377            InlineShape::BeginningOfStream => DbgDocBldr::blank(),
378            InlineShape::EndOfStream => DbgDocBldr::blank(),
379        }
380    }
381}
382
383pub trait GroupedValue: Debug + Clone {
384    type Item;
385
386    fn new() -> Self;
387    fn merge(&mut self, value: Self::Item);
388}
389
390impl GroupedValue for Vec<(usize, usize)> {
391    type Item = usize;
392
393    fn new() -> Vec<(usize, usize)> {
394        vec![]
395    }
396
397    fn merge(&mut self, new_value: usize) {
398        match self.last_mut() {
399            Some(value) if value.1 == new_value - 1 => {
400                value.1 += 1;
401            }
402
403            _ => self.push((new_value, new_value)),
404        }
405    }
406}
407
408#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Deserialize, Serialize)]
409pub enum Column {
410    String(String),
411    Value,
412}
413
414impl From<String> for Column {
415    fn from(x: String) -> Self {
416        Column::String(x)
417    }
418}
419
420impl From<&String> for Column {
421    fn from(x: &String) -> Self {
422        Column::String(x.clone())
423    }
424}
425
426impl From<&str> for Column {
427    fn from(x: &str) -> Self {
428        Column::String(x.to_string())
429    }
430}
431
432/// A shape representation of the type of a row
433#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
434pub struct Row {
435    map: IndexMap<Column, InlineShape>,
436}
437
438#[allow(clippy::derive_hash_xor_eq)]
439impl Hash for Row {
440    fn hash<H: Hasher>(&self, state: &mut H) {
441        let mut entries = self.map.clone();
442        entries.sort_keys();
443        entries.keys().collect::<Vec<&Column>>().hash(state);
444        entries.values().collect::<Vec<&InlineShape>>().hash(state);
445    }
446}
447
448impl PartialOrd for Row {
449    fn partial_cmp(&self, other: &Row) -> Option<Ordering> {
450        let this: Vec<&Column> = self.map.keys().collect();
451        let that: Vec<&Column> = other.map.keys().collect();
452
453        if this != that {
454            return this.partial_cmp(&that);
455        }
456
457        let this: Vec<&InlineShape> = self.map.values().collect();
458        let that: Vec<&InlineShape> = self.map.values().collect();
459
460        this.partial_cmp(&that)
461    }
462}
463
464impl Ord for Row {
465    /// Compare two dictionaries for ordering
466    fn cmp(&self, other: &Row) -> Ordering {
467        let this: Vec<&Column> = self.map.keys().collect();
468        let that: Vec<&Column> = other.map.keys().collect();
469
470        if this != that {
471            return this.cmp(&that);
472        }
473
474        let this: Vec<&InlineShape> = self.map.values().collect();
475        let that: Vec<&InlineShape> = self.map.values().collect();
476
477        this.cmp(&that)
478    }
479}