Skip to main content

nu_protocol/ast/
cell_path.rs

1use super::Expression;
2use crate::{Span, casing::Casing};
3use nu_utils::{escape_quote_string, needs_quoting};
4use serde::{Deserialize, Serialize};
5use std::{cmp::Ordering, fmt::Display, str::FromStr};
6use winnow::Parser;
7
8/// One level of access of a [`CellPath`]
9#[derive(Debug, Clone)]
10pub enum PathMember {
11    /// Accessing a member by string (i.e. columns of a table or [`Record`](crate::Record))
12    String {
13        val: String,
14        span: Span,
15        /// If marked as optional don't throw an error if not found but perform default handling
16        /// (e.g. return `Value::Nothing`)
17        optional: bool,
18        /// Affects column lookup
19        casing: Casing,
20    },
21    /// Accessing a member by index (i.e. row of a table or item in a list)
22    Int {
23        val: usize,
24        span: Span,
25        /// If marked as optional don't throw an error if not found but perform default handling
26        /// (e.g. return `Value::Nothing`)
27        optional: bool,
28    },
29}
30
31impl PathMember {
32    pub fn int(val: usize, optional: bool, span: Span) -> Self {
33        PathMember::Int {
34            val,
35            span,
36            optional,
37        }
38    }
39
40    pub fn string(val: String, optional: bool, casing: Casing, span: Span) -> Self {
41        PathMember::String {
42            val,
43            span,
44            optional,
45            casing,
46        }
47    }
48
49    pub fn test_int(val: usize, optional: bool) -> Self {
50        PathMember::Int {
51            val,
52            optional,
53            span: Span::test_data(),
54        }
55    }
56
57    pub fn test_string(val: String, optional: bool, casing: Casing) -> Self {
58        PathMember::String {
59            val,
60            optional,
61            casing,
62            span: Span::test_data(),
63        }
64    }
65
66    pub fn make_optional(&mut self) {
67        match self {
68            PathMember::String { optional, .. } => *optional = true,
69            PathMember::Int { optional, .. } => *optional = true,
70        }
71    }
72
73    pub fn make_insensitive(&mut self) {
74        match self {
75            PathMember::String { casing, .. } => *casing = Casing::Insensitive,
76            PathMember::Int { .. } => {}
77        }
78    }
79
80    pub fn span(&self) -> Span {
81        match self {
82            PathMember::String { span, .. } => *span,
83            PathMember::Int { span, .. } => *span,
84        }
85    }
86
87    /// Returns an estimate of the memory size used by this PathMember in bytes
88    pub fn memory_size(&self) -> usize {
89        match self {
90            PathMember::String { val, .. } => std::mem::size_of::<Self>() + val.capacity(),
91            PathMember::Int { .. } => std::mem::size_of::<Self>(),
92        }
93    }
94}
95
96impl PartialEq for PathMember {
97    fn eq(&self, other: &Self) -> bool {
98        match (self, other) {
99            (
100                Self::String {
101                    val: l_val,
102                    optional: l_opt,
103                    ..
104                },
105                Self::String {
106                    val: r_val,
107                    optional: r_opt,
108                    ..
109                },
110            ) => l_val == r_val && l_opt == r_opt,
111            (
112                Self::Int {
113                    val: l_val,
114                    optional: l_opt,
115                    ..
116                },
117                Self::Int {
118                    val: r_val,
119                    optional: r_opt,
120                    ..
121                },
122            ) => l_val == r_val && l_opt == r_opt,
123            _ => false,
124        }
125    }
126}
127
128impl PartialOrd for PathMember {
129    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
130        match (self, other) {
131            (
132                PathMember::String {
133                    val: l_val,
134                    optional: l_opt,
135                    ..
136                },
137                PathMember::String {
138                    val: r_val,
139                    optional: r_opt,
140                    ..
141                },
142            ) => {
143                let val_ord = Some(l_val.cmp(r_val));
144
145                if let Some(Ordering::Equal) = val_ord {
146                    Some(l_opt.cmp(r_opt))
147                } else {
148                    val_ord
149                }
150            }
151            (
152                PathMember::Int {
153                    val: l_val,
154                    optional: l_opt,
155                    ..
156                },
157                PathMember::Int {
158                    val: r_val,
159                    optional: r_opt,
160                    ..
161                },
162            ) => {
163                let val_ord = Some(l_val.cmp(r_val));
164
165                if let Some(Ordering::Equal) = val_ord {
166                    Some(l_opt.cmp(r_opt))
167                } else {
168                    val_ord
169                }
170            }
171            (PathMember::Int { .. }, PathMember::String { .. }) => Some(Ordering::Greater),
172            (PathMember::String { .. }, PathMember::Int { .. }) => Some(Ordering::Less),
173        }
174    }
175}
176
177impl Display for PathMember {
178    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
179        match self {
180            PathMember::Int { val, optional, .. } => {
181                let question_mark = if *optional { "?" } else { "" };
182                write!(f, "{val}{question_mark}")
183            }
184            PathMember::String {
185                val,
186                optional,
187                casing,
188                ..
189            } => {
190                let question_mark = if *optional { "?" } else { "" };
191                let exclamation_mark = if *casing == Casing::Insensitive {
192                    "!"
193                } else {
194                    ""
195                };
196                let val = if needs_quoting(val) {
197                    &escape_quote_string(val)
198                } else {
199                    val
200                };
201                write!(f, "{val}{exclamation_mark}{question_mark}")
202            }
203        }
204    }
205}
206
207#[derive(Debug, thiserror::Error)]
208#[error("could not parse path member {attempted:?}")]
209pub struct PathMemberParseError {
210    attempted: String,
211}
212
213impl FromStr for PathMember {
214    type Err = PathMemberParseError;
215
216    fn from_str(s: &str) -> Result<Self, Self::Err> {
217        parse::path_member
218            .parse(s)
219            .map_err(|_| PathMemberParseError {
220                attempted: s.to_owned(),
221            })
222    }
223}
224
225impl Serialize for PathMember {
226    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
227    where
228        S: serde::Serializer,
229    {
230        self.to_string().serialize(serializer)
231    }
232}
233
234impl<'de> Deserialize<'de> for PathMember {
235    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
236    where
237        D: serde::Deserializer<'de>,
238    {
239        let s = String::deserialize(deserializer)?;
240        Self::from_str(&s).map_err(serde::de::Error::custom)
241    }
242}
243
244/// [`PathMember`] for testing purposes.
245///
246/// This path member may be converted via [`into_path_member`](Self::into_path_member) into a
247/// [`PathMember`] that is using a [`Span::test_data()`](crate::Span::test_data) span.
248#[doc(hidden)]
249pub struct TestPathMember<T>(T);
250
251impl<S: Into<String>> From<S> for TestPathMember<String> {
252    fn from(value: S) -> Self {
253        Self(value.into())
254    }
255}
256
257impl TestPathMember<String> {
258    pub fn into_path_member(self) -> PathMember {
259        PathMember::test_string(self.0, false, Casing::Sensitive)
260    }
261}
262
263impl From<usize> for TestPathMember<usize> {
264    fn from(value: usize) -> Self {
265        Self(value)
266    }
267}
268
269impl TestPathMember<usize> {
270    pub fn into_path_member(self) -> PathMember {
271        PathMember::test_int(self.0, false)
272    }
273}
274
275/// Represents the potentially nested access to fields/cells of a container type
276///
277/// In our current implementation for table access the order of row/column is commutative.
278/// This limits the number of possible rows to select in one [`CellPath`] to 1 as it could
279/// otherwise be ambiguous
280///
281/// ```nushell
282/// col1.0
283/// 0.col1
284/// col2
285/// 42
286/// ```
287#[derive(Debug, Clone, PartialEq, PartialOrd)]
288pub struct CellPath {
289    pub members: Vec<PathMember>,
290}
291
292impl CellPath {
293    pub fn empty() -> Self {
294        Self {
295            members: Vec::new(),
296        }
297    }
298
299    pub fn make_optional(&mut self) {
300        for member in &mut self.members {
301            member.make_optional();
302        }
303    }
304
305    pub fn make_insensitive(&mut self) {
306        for member in &mut self.members {
307            member.make_insensitive();
308        }
309    }
310
311    // Formats the cell-path as a column name, i.e. without quoting and optional markers ('?').
312    pub fn to_column_name(&self) -> String {
313        let mut s = String::new();
314
315        for member in &self.members {
316            match member {
317                PathMember::Int { val, .. } => {
318                    s += &val.to_string();
319                }
320                PathMember::String { val, .. } => {
321                    s += val;
322                }
323            }
324
325            s.push('.');
326        }
327
328        s.pop(); // Easier than checking whether to insert the '.' on every iteration.
329        s
330    }
331
332    /// Returns an estimate of the memory size used by this CellPath in bytes
333    pub fn memory_size(&self) -> usize {
334        std::mem::size_of::<Self>() + self.members.iter().map(|m| m.memory_size()).sum::<usize>()
335    }
336}
337
338impl Display for CellPath {
339    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
340        write!(f, "$")?;
341        for member in self.members.iter() {
342            write!(f, ".{member}")?;
343        }
344        // Empty cell-paths are `$.` not `$`
345        if self.members.is_empty() {
346            write!(f, ".")?;
347        }
348        Ok(())
349    }
350}
351
352#[derive(Debug, thiserror::Error)]
353#[error("could not parse cell path {attempted:?}")]
354pub struct CellPathParseError {
355    attempted: String,
356}
357
358impl FromStr for CellPath {
359    type Err = CellPathParseError;
360
361    fn from_str(s: &str) -> Result<Self, Self::Err> {
362        parse::cell_path.parse(s).map_err(|_| CellPathParseError {
363            attempted: s.to_owned(),
364        })
365    }
366}
367
368impl Serialize for CellPath {
369    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
370    where
371        S: serde::Serializer,
372    {
373        self.to_string().serialize(serializer)
374    }
375}
376
377impl<'de> Deserialize<'de> for CellPath {
378    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
379    where
380        D: serde::Deserializer<'de>,
381    {
382        let s = String::deserialize(deserializer)?;
383        Self::from_str(&s).map_err(serde::de::Error::custom)
384    }
385}
386
387#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
388pub struct FullCellPath {
389    pub head: Expression,
390    pub tail: Vec<PathMember>,
391}
392
393mod parse {
394    use super::*;
395    use winnow::{
396        Result, Str, combinator::*, error::*, prelude::*, stream::ContainsToken, token::*,
397    };
398
399    pub fn cell_path(input: &mut &str) -> Result<CellPath> {
400        preceded(opt("$."), repeat(0.., terminated(path_member, opt('.'))))
401            .parse_next(input)
402            .map(|members| CellPath { members })
403    }
404
405    pub fn path_member(input: &mut &str) -> Result<PathMember> {
406        if input.is_empty() {
407            return Err(ParserError::from_input(input));
408        }
409
410        let member = alt((int_path_member, string_path_member)).parse_next(input)?;
411
412        // ensure there's no more content after a member
413        peek(alt((".", eof))).parse_next(input)?;
414
415        Ok(member)
416    }
417
418    fn int_path_member(input: &mut &str) -> Result<PathMember> {
419        let int = digits.parse_next(input)?;
420        let modifier = modifier.parse_next(input)?;
421        Ok(PathMember::Int {
422            val: int,
423            span: Span::unknown(),
424            optional: modifier.optional,
425        })
426    }
427
428    fn digits(input: &mut &str) -> Result<usize> {
429        let start = input.checkpoint();
430        if let Ok(prefix) = digit_prefix.parse_next(input) {
431            return match prefix {
432                DigitPrefix::Bin => bin_digits.parse_next(input),
433                DigitPrefix::Oct => oct_digits.parse_next(input),
434                DigitPrefix::Hex => hex_digits.parse_next(input),
435            };
436        }
437
438        input.reset(&start);
439        dec_digits.parse_next(input)
440    }
441
442    enum DigitPrefix {
443        Bin,
444        Oct,
445        Hex,
446    }
447
448    fn digit_prefix(input: &mut &str) -> Result<DigitPrefix> {
449        let prefix = take(2usize).parse_next(input)?;
450        Ok(match prefix {
451            "0b" => DigitPrefix::Bin,
452            "0o" => DigitPrefix::Oct,
453            "Ox" => DigitPrefix::Hex,
454            _ => return fail(input),
455        })
456    }
457
458    fn bin_digits(input: &mut &str) -> Result<usize> {
459        any_radix_digits(2, ('_', '0', '1')).parse_next(input)
460    }
461
462    fn oct_digits(input: &mut &str) -> Result<usize> {
463        any_radix_digits(8, ('_', '0'..='7')).parse_next(input)
464    }
465
466    fn dec_digits(input: &mut &str) -> Result<usize> {
467        any_radix_digits(10, ('_', '0'..='9')).parse_next(input)
468    }
469
470    fn hex_digits(input: &mut &str) -> Result<usize> {
471        any_radix_digits(16, ('_', '0'..='9', 'a'..='f', 'A'..='Z')).parse_next(input)
472    }
473
474    fn any_radix_digits<'i>(
475        radix: u32,
476        tokens: impl ContainsToken<char>,
477    ) -> impl Parser<Str<'i>, usize, ContextError> {
478        take_while(1.., tokens)
479            .map(|d: &str| d.replace('_', ""))
480            .verify(|d: &str| !d.is_empty())
481            .try_map(move |d| usize::from_str_radix(&d, radix))
482    }
483
484    fn string_path_member(input: &mut &str) -> Result<PathMember> {
485        let string = alt((
486            single_quoted_string,
487            bare_word_string,
488            double_quoted_string,
489            unquoted_string,
490        ))
491        .parse_next(input)?;
492
493        let modifier = modifier.parse_next(input)?;
494
495        Ok(PathMember::String {
496            val: string,
497            span: Span::unknown(),
498            optional: modifier.optional,
499            casing: match modifier.case_insensitive {
500                true => Casing::Insensitive,
501                false => Default::default(),
502            },
503        })
504    }
505
506    fn unquoted_string(input: &mut &str) -> Result<String> {
507        struct UnquotedTokens;
508
509        impl ContainsToken<char> for UnquotedTokens {
510            fn contains_token(&self, token: char) -> bool {
511                match token {
512                    // spaces and tabs
513                    ' ' | '\n' | '\t' => false,
514
515                    // syntax characters
516                    '!' | '?' | '.' => false,
517
518                    // brackets
519                    '(' | ')' => false,
520
521                    _ => true,
522                }
523            }
524        }
525
526        take_while(0.., UnquotedTokens)
527            .parse_next(input)
528            .map(|s| s.to_owned())
529    }
530
531    fn single_quoted_string(input: &mut &str) -> Result<String> {
532        delimited("'", take_while(0.., |c| c != '\''), "'")
533            .parse_next(input)
534            .map(|s| s.to_owned())
535    }
536
537    fn bare_word_string(input: &mut &str) -> Result<String> {
538        delimited("`", take_while(0.., |c| c != '`'), "`")
539            .parse_next(input)
540            .map(|s| s.to_owned())
541    }
542
543    fn double_quoted_string(input: &mut &str) -> Result<String> {
544        fn escaped(input: &mut &str) -> Result<char> {
545            preceded(
546                '\\',
547                alt((
548                    'n'.value('\n'),
549                    'r'.value('\r'),
550                    't'.value('\t'),
551                    '\\'.value('\\'),
552                    '/'.value('/'),
553                    '"'.value('"'),
554                )),
555            )
556            .parse_next(input)
557        }
558
559        fn char(input: &mut &str) -> Result<char> {
560            any.verify(|c| *c != '"').parse_next(input)
561        }
562
563        let content = repeat(0.., alt((escaped, char))).fold(String::new, |mut string, char| {
564            string.push(char);
565            string
566        });
567
568        delimited('"', content, '"').parse_next(input)
569    }
570
571    #[derive(Default)]
572    struct Modifier {
573        optional: bool,
574        case_insensitive: bool,
575    }
576
577    fn modifier(input: &mut &str) -> Result<Modifier> {
578        let mut modifier = Modifier::default();
579
580        loop {
581            let Some(next) = opt(alt(('!', '?'))).parse_next(input)? else {
582                break;
583            };
584
585            let expected = match (next, modifier.optional, modifier.case_insensitive) {
586                ('!', _, false) => {
587                    modifier.case_insensitive = true;
588                    continue;
589                }
590                ('?', false, _) => {
591                    modifier.optional = true;
592                    continue;
593                }
594                ('!', false, true) => "'?' or '.'",
595                ('!', true, true) => "'.'",
596                ('?', true, false) => "'!' or '.'",
597                ('?', true, true) => "'.'",
598                (c, _, _) => unreachable!("parser only returns with '!' or '?', got {c:?}"),
599            };
600
601            fail.context(StrContext::Expected(StrContextValue::Description(expected)))
602                .parse_next(input)?
603        }
604
605        Ok(modifier)
606    }
607}
608
609#[cfg(test)]
610mod test {
611    use super::*;
612    use std::cmp::Ordering::Greater;
613
614    #[test]
615    fn path_member_partial_ord() {
616        assert_eq!(
617            Some(Greater),
618            PathMember::test_int(5, true).partial_cmp(&PathMember::test_string(
619                "e".into(),
620                true,
621                Casing::Sensitive
622            ))
623        );
624
625        assert_eq!(
626            Some(Greater),
627            PathMember::test_int(5, true).partial_cmp(&PathMember::test_int(5, false))
628        );
629
630        assert_eq!(
631            Some(Greater),
632            PathMember::test_int(6, true).partial_cmp(&PathMember::test_int(5, true))
633        );
634
635        assert_eq!(
636            Some(Greater),
637            PathMember::test_string("e".into(), true, Casing::Sensitive).partial_cmp(
638                &PathMember::test_string("e".into(), false, Casing::Sensitive)
639            )
640        );
641
642        assert_eq!(
643            Some(Greater),
644            PathMember::test_string("f".into(), true, Casing::Sensitive).partial_cmp(
645                &PathMember::test_string("e".into(), true, Casing::Sensitive)
646            )
647        );
648    }
649}