nu_command/strings/str_/
substring.rs

1use std::ops::Bound;
2
3use crate::{grapheme_flags, grapheme_flags_const};
4use nu_cmd_base::input_handler::{CmdArgument, operate};
5use nu_engine::command_prelude::*;
6use nu_protocol::{IntRange, engine::StateWorkingSet};
7use unicode_segmentation::UnicodeSegmentation;
8
9#[derive(Clone)]
10pub struct StrSubstring;
11
12struct Arguments {
13    range: IntRange,
14    cell_paths: Option<Vec<CellPath>>,
15    graphemes: bool,
16}
17
18impl CmdArgument for Arguments {
19    fn take_cell_paths(&mut self) -> Option<Vec<CellPath>> {
20        self.cell_paths.take()
21    }
22}
23
24impl Command for StrSubstring {
25    fn name(&self) -> &str {
26        "str substring"
27    }
28
29    fn signature(&self) -> Signature {
30        Signature::build("str substring")
31            .input_output_types(vec![
32                (Type::String, Type::String),
33                (Type::List(Box::new(Type::String)), Type::List(Box::new(Type::String))),
34                (Type::table(), Type::table()),
35                (Type::record(), Type::record()),
36            ])
37            .allow_variants_without_examples(true)
38            .switch(
39                "grapheme-clusters",
40                "count indexes and split using grapheme clusters (all visible chars have length 1)",
41                Some('g'),
42            )
43            .switch(
44                "utf-8-bytes",
45                "count indexes and split using UTF-8 bytes (default; non-ASCII chars have length 2+)",
46                Some('b'),
47            )
48            .required(
49                "range",
50                SyntaxShape::Any,
51                "The indexes to substring [start end].",
52            )
53            .rest(
54                "rest",
55                SyntaxShape::CellPath,
56                "For a data structure input, turn strings at the given cell paths into substrings.",
57            )
58            .category(Category::Strings)
59    }
60
61    fn description(&self) -> &str {
62        "Get part of a string. Note that the first character of a string is index 0."
63    }
64
65    fn search_terms(&self) -> Vec<&str> {
66        vec!["slice"]
67    }
68
69    fn is_const(&self) -> bool {
70        true
71    }
72
73    fn run(
74        &self,
75        engine_state: &EngineState,
76        stack: &mut Stack,
77        call: &Call,
78        input: PipelineData,
79    ) -> Result<PipelineData, ShellError> {
80        let range: IntRange = call.req(engine_state, stack, 0)?;
81
82        let cell_paths: Vec<CellPath> = call.rest(engine_state, stack, 1)?;
83        let cell_paths = (!cell_paths.is_empty()).then_some(cell_paths);
84        let args = Arguments {
85            range,
86            cell_paths,
87            graphemes: grapheme_flags(engine_state, stack, call)?,
88        };
89        operate(action, args, input, call.head, engine_state.signals())
90    }
91
92    fn run_const(
93        &self,
94        working_set: &StateWorkingSet,
95        call: &Call,
96        input: PipelineData,
97    ) -> Result<PipelineData, ShellError> {
98        let range: IntRange = call.req_const(working_set, 0)?;
99
100        let cell_paths: Vec<CellPath> = call.rest_const(working_set, 1)?;
101        let cell_paths = (!cell_paths.is_empty()).then_some(cell_paths);
102        let args = Arguments {
103            range,
104            cell_paths,
105            graphemes: grapheme_flags_const(working_set, call)?,
106        };
107        operate(
108            action,
109            args,
110            input,
111            call.head,
112            working_set.permanent().signals(),
113        )
114    }
115
116    fn examples(&self) -> Vec<Example> {
117        vec![
118            Example {
119                description: "Get a substring \"nushell\" from the text \"good nushell\" using a range",
120                example: " 'good nushell' | str substring 5..11",
121                result: Some(Value::test_string("nushell")),
122            },
123            Example {
124                description: "Count indexes and split using grapheme clusters",
125                example: " '๐Ÿ‡ฏ๐Ÿ‡ตใปใ’ ใตใŒ ใดใ‚ˆ' | str substring --grapheme-clusters 4..5",
126                result: Some(Value::test_string("ใตใŒ")),
127            },
128            Example {
129                description: "sub string by negative index",
130                example: " 'good nushell' | str substring 5..-2",
131                result: Some(Value::test_string("nushel")),
132            },
133        ]
134    }
135}
136
137fn action(input: &Value, args: &Arguments, head: Span) -> Value {
138    match input {
139        Value::String { val: s, .. } => {
140            let s = if args.graphemes {
141                let indices = s
142                    .grapheme_indices(true)
143                    .map(|(idx, s)| (idx, s.len()))
144                    .collect::<Vec<_>>();
145
146                let (idx_start, idx_end) = args.range.absolute_bounds(indices.len());
147                let idx_range = match idx_end {
148                    Bound::Excluded(end) => &indices[idx_start..end],
149                    Bound::Included(end) => &indices[idx_start..=end],
150                    Bound::Unbounded => &indices[idx_start..],
151                };
152
153                if let Some((start, end)) = idx_range.first().zip(idx_range.last()) {
154                    let start = start.0;
155                    let end = end.0 + end.1;
156                    s[start..end].to_owned()
157                } else {
158                    String::new()
159                }
160            } else {
161                let (start, end) = args.range.absolute_bounds(s.len());
162                let s = match end {
163                    Bound::Excluded(end) => &s.as_bytes()[start..end],
164                    Bound::Included(end) => &s.as_bytes()[start..=end],
165                    Bound::Unbounded => &s.as_bytes()[start..],
166                };
167                String::from_utf8_lossy(s).into_owned()
168            };
169            Value::string(s, head)
170        }
171        // Propagate errors by explicitly matching them before the final case.
172        Value::Error { .. } => input.clone(),
173        other => Value::error(
174            ShellError::UnsupportedInput {
175                msg: "Only string values are supported".into(),
176                input: format!("input type: {:?}", other.get_type()),
177                msg_span: head,
178                input_span: other.span(),
179            },
180            head,
181        ),
182    }
183}
184
185#[cfg(test)]
186#[allow(clippy::reversed_empty_ranges)]
187mod tests {
188    use nu_protocol::IntRange;
189
190    use super::{Arguments, Span, StrSubstring, Value, action};
191
192    #[test]
193    fn test_examples() {
194        use crate::test_examples;
195
196        test_examples(StrSubstring {})
197    }
198
199    #[derive(Clone, Copy, Debug)]
200    struct RangeHelper {
201        start: i64,
202        end: i64,
203        inclusion: nu_protocol::ast::RangeInclusion,
204    }
205
206    #[derive(Debug)]
207    struct Expectation<'a> {
208        range: RangeHelper,
209        expected: &'a str,
210    }
211
212    impl From<std::ops::RangeInclusive<i64>> for RangeHelper {
213        fn from(value: std::ops::RangeInclusive<i64>) -> Self {
214            RangeHelper {
215                start: *value.start(),
216                end: *value.end(),
217                inclusion: nu_protocol::ast::RangeInclusion::Inclusive,
218            }
219        }
220    }
221
222    impl From<std::ops::Range<i64>> for RangeHelper {
223        fn from(value: std::ops::Range<i64>) -> Self {
224            RangeHelper {
225                start: value.start,
226                end: value.end,
227                inclusion: nu_protocol::ast::RangeInclusion::RightExclusive,
228            }
229        }
230    }
231
232    impl From<RangeHelper> for IntRange {
233        fn from(value: RangeHelper) -> Self {
234            match IntRange::new(
235                Value::test_int(value.start),
236                Value::test_int(value.start + (if value.start <= value.end { 1 } else { -1 })),
237                Value::test_int(value.end),
238                value.inclusion,
239                Span::test_data(),
240            ) {
241                Ok(val) => val,
242                Err(e) => {
243                    panic!("{value:?}: {e:?}")
244                }
245            }
246        }
247    }
248
249    impl Expectation<'_> {
250        fn range(&self) -> IntRange {
251            self.range.into()
252        }
253    }
254
255    fn expectation(word: &str, range: impl Into<RangeHelper>) -> Expectation {
256        Expectation {
257            range: range.into(),
258            expected: word,
259        }
260    }
261
262    #[test]
263    fn substrings_indexes() {
264        let word = Value::test_string("andres");
265
266        let cases = vec![
267            expectation("", 0..0),
268            expectation("a", 0..=0),
269            expectation("an", 0..=1),
270            expectation("and", 0..=2),
271            expectation("andr", 0..=3),
272            expectation("andre", 0..=4),
273            expectation("andres", 0..=5),
274            expectation("andres", 0..=6),
275            expectation("a", 0..=-6),
276            expectation("an", 0..=-5),
277            expectation("and", 0..=-4),
278            expectation("andr", 0..=-3),
279            expectation("andre", 0..=-2),
280            expectation("andres", 0..=-1),
281            // str substring [ -4 , _ ]
282            // str substring   -4 ,
283            expectation("dres", -4..=i64::MAX),
284            expectation("", 0..=-110),
285            expectation("", 6..=0),
286            expectation("", 6..=-1),
287            expectation("", 6..=-2),
288            expectation("", 6..=-3),
289            expectation("", 6..=-4),
290            expectation("", 6..=-5),
291            expectation("", 6..=-6),
292        ];
293
294        for expectation in &cases {
295            println!("{:?}", expectation);
296            let expected = expectation.expected;
297            let actual = action(
298                &word,
299                &Arguments {
300                    range: expectation.range(),
301                    cell_paths: None,
302                    graphemes: false,
303                },
304                Span::test_data(),
305            );
306
307            assert_eq!(actual, Value::test_string(expected));
308        }
309    }
310
311    #[test]
312    fn use_utf8_bytes() {
313        let word = Value::string(String::from("๐Ÿ‡ฏ๐Ÿ‡ตใปใ’ ใตใŒ ใดใ‚ˆ"), Span::test_data());
314
315        let range: RangeHelper = (4..=5).into();
316        let options = Arguments {
317            cell_paths: None,
318            range: range.into(),
319            graphemes: false,
320        };
321
322        let actual = action(&word, &options, Span::test_data());
323        assert_eq!(actual, Value::test_string("๏ฟฝ"));
324    }
325}