nu_command/strings/str_/
substring.rs

1use std::ops::Bound;
2
3use crate::{grapheme_flags, grapheme_flags_const};
4use nu_cmd_base::input_handler::{CmdArgument, operate};
5use nu_engine::command_prelude::*;
6use nu_protocol::{IntRange, engine::StateWorkingSet};
7use unicode_segmentation::UnicodeSegmentation;
8
9#[derive(Clone)]
10pub struct StrSubstring;
11
12struct Arguments {
13    range: IntRange,
14    cell_paths: Option<Vec<CellPath>>,
15    graphemes: bool,
16}
17
18impl CmdArgument for Arguments {
19    fn take_cell_paths(&mut self) -> Option<Vec<CellPath>> {
20        self.cell_paths.take()
21    }
22}
23
24impl Command for StrSubstring {
25    fn name(&self) -> &str {
26        "str substring"
27    }
28
29    fn signature(&self) -> Signature {
30        Signature::build("str substring")
31            .input_output_types(vec![
32                (Type::String, Type::String),
33                (Type::List(Box::new(Type::String)), Type::List(Box::new(Type::String))),
34                (Type::table(), Type::table()),
35                (Type::record(), Type::record()),
36            ])
37            .allow_variants_without_examples(true)
38            .switch(
39                "grapheme-clusters",
40                "count indexes and split using grapheme clusters (all visible chars have length 1)",
41                Some('g'),
42            )
43            .switch(
44                "utf-8-bytes",
45                "count indexes and split using UTF-8 bytes (default; non-ASCII chars have length 2+)",
46                Some('b'),
47            )
48            .required(
49                "range",
50                SyntaxShape::Any,
51                "The indexes to substring [start end].",
52            )
53            .rest(
54                "rest",
55                SyntaxShape::CellPath,
56                "For a data structure input, turn strings at the given cell paths into substrings.",
57            )
58            .category(Category::Strings)
59    }
60
61    fn description(&self) -> &str {
62        "Get part of a string. Note that the first character of a string is index 0."
63    }
64
65    fn search_terms(&self) -> Vec<&str> {
66        vec!["slice"]
67    }
68
69    fn is_const(&self) -> bool {
70        true
71    }
72
73    fn run(
74        &self,
75        engine_state: &EngineState,
76        stack: &mut Stack,
77        call: &Call,
78        input: PipelineData,
79    ) -> Result<PipelineData, ShellError> {
80        let range: IntRange = call.req(engine_state, stack, 0)?;
81
82        let cell_paths: Vec<CellPath> = call.rest(engine_state, stack, 1)?;
83        let cell_paths = (!cell_paths.is_empty()).then_some(cell_paths);
84        let args = Arguments {
85            range,
86            cell_paths,
87            graphemes: grapheme_flags(engine_state, stack, call)?,
88        };
89        operate(action, args, input, call.head, engine_state.signals()).map(|pipeline| {
90            // a substring of text/json is not necessarily text/json itself
91            let metadata = pipeline.metadata().map(|m| m.with_content_type(None));
92            pipeline.set_metadata(metadata)
93        })
94    }
95
96    fn run_const(
97        &self,
98        working_set: &StateWorkingSet,
99        call: &Call,
100        input: PipelineData,
101    ) -> Result<PipelineData, ShellError> {
102        let range: IntRange = call.req_const(working_set, 0)?;
103
104        let cell_paths: Vec<CellPath> = call.rest_const(working_set, 1)?;
105        let cell_paths = (!cell_paths.is_empty()).then_some(cell_paths);
106        let args = Arguments {
107            range,
108            cell_paths,
109            graphemes: grapheme_flags_const(working_set, call)?,
110        };
111        operate(
112            action,
113            args,
114            input,
115            call.head,
116            working_set.permanent().signals(),
117        )
118        .map(|pipeline| {
119            // a substring of text/json is not necessarily text/json itself
120            let metadata = pipeline.metadata().map(|m| m.with_content_type(None));
121            pipeline.set_metadata(metadata)
122        })
123    }
124
125    fn examples(&self) -> Vec<Example<'_>> {
126        vec![
127            Example {
128                description: "Get a substring \"nushell\" from the text \"good nushell\" using a range",
129                example: " 'good nushell' | str substring 5..11",
130                result: Some(Value::test_string("nushell")),
131            },
132            Example {
133                description: "Count indexes and split using grapheme clusters",
134                example: " '๐Ÿ‡ฏ๐Ÿ‡ตใปใ’ ใตใŒ ใดใ‚ˆ' | str substring --grapheme-clusters 4..5",
135                result: Some(Value::test_string("ใตใŒ")),
136            },
137            Example {
138                description: "sub string by negative index",
139                example: " 'good nushell' | str substring 5..-2",
140                result: Some(Value::test_string("nushel")),
141            },
142        ]
143    }
144}
145
146fn action(input: &Value, args: &Arguments, head: Span) -> Value {
147    match input {
148        Value::String { val: s, .. } => {
149            let s = if args.graphemes {
150                let indices = s
151                    .grapheme_indices(true)
152                    .map(|(idx, s)| (idx, s.len()))
153                    .collect::<Vec<_>>();
154
155                let (idx_start, idx_end) = args.range.absolute_bounds(indices.len());
156                let idx_range = match idx_end {
157                    Bound::Excluded(end) => &indices[idx_start..end],
158                    Bound::Included(end) => &indices[idx_start..=end],
159                    Bound::Unbounded => &indices[idx_start..],
160                };
161
162                if let Some((start, end)) = idx_range.first().zip(idx_range.last()) {
163                    let start = start.0;
164                    let end = end.0 + end.1;
165                    s[start..end].to_owned()
166                } else {
167                    String::new()
168                }
169            } else {
170                let (start, end) = args.range.absolute_bounds(s.len());
171                let s = match end {
172                    Bound::Excluded(end) => &s.as_bytes()[start..end],
173                    Bound::Included(end) => &s.as_bytes()[start..=end],
174                    Bound::Unbounded => &s.as_bytes()[start..],
175                };
176                String::from_utf8_lossy(s).into_owned()
177            };
178            Value::string(s, head)
179        }
180        // Propagate errors by explicitly matching them before the final case.
181        Value::Error { .. } => input.clone(),
182        other => Value::error(
183            ShellError::UnsupportedInput {
184                msg: "Only string values are supported".into(),
185                input: format!("input type: {:?}", other.get_type()),
186                msg_span: head,
187                input_span: other.span(),
188            },
189            head,
190        ),
191    }
192}
193
194#[cfg(test)]
195#[allow(clippy::reversed_empty_ranges)]
196mod tests {
197    use nu_protocol::IntRange;
198
199    use super::{Arguments, Span, StrSubstring, Value, action};
200
201    #[test]
202    fn test_examples() {
203        use crate::test_examples;
204
205        test_examples(StrSubstring {})
206    }
207
208    #[derive(Clone, Copy, Debug)]
209    struct RangeHelper {
210        start: i64,
211        end: i64,
212        inclusion: nu_protocol::ast::RangeInclusion,
213    }
214
215    #[derive(Debug)]
216    struct Expectation<'a> {
217        range: RangeHelper,
218        expected: &'a str,
219    }
220
221    impl From<std::ops::RangeInclusive<i64>> for RangeHelper {
222        fn from(value: std::ops::RangeInclusive<i64>) -> Self {
223            RangeHelper {
224                start: *value.start(),
225                end: *value.end(),
226                inclusion: nu_protocol::ast::RangeInclusion::Inclusive,
227            }
228        }
229    }
230
231    impl From<std::ops::Range<i64>> for RangeHelper {
232        fn from(value: std::ops::Range<i64>) -> Self {
233            RangeHelper {
234                start: value.start,
235                end: value.end,
236                inclusion: nu_protocol::ast::RangeInclusion::RightExclusive,
237            }
238        }
239    }
240
241    impl From<RangeHelper> for IntRange {
242        fn from(value: RangeHelper) -> Self {
243            match IntRange::new(
244                Value::test_int(value.start),
245                Value::test_int(value.start + (if value.start <= value.end { 1 } else { -1 })),
246                Value::test_int(value.end),
247                value.inclusion,
248                Span::test_data(),
249            ) {
250                Ok(val) => val,
251                Err(e) => {
252                    panic!("{value:?}: {e:?}")
253                }
254            }
255        }
256    }
257
258    impl Expectation<'_> {
259        fn range(&self) -> IntRange {
260            self.range.into()
261        }
262    }
263
264    fn expectation(word: &str, range: impl Into<RangeHelper>) -> Expectation<'_> {
265        Expectation {
266            range: range.into(),
267            expected: word,
268        }
269    }
270
271    #[test]
272    fn substrings_indexes() {
273        let word = Value::test_string("andres");
274
275        let cases = vec![
276            expectation("", 0..0),
277            expectation("a", 0..=0),
278            expectation("an", 0..=1),
279            expectation("and", 0..=2),
280            expectation("andr", 0..=3),
281            expectation("andre", 0..=4),
282            expectation("andres", 0..=5),
283            expectation("andres", 0..=6),
284            expectation("a", 0..=-6),
285            expectation("an", 0..=-5),
286            expectation("and", 0..=-4),
287            expectation("andr", 0..=-3),
288            expectation("andre", 0..=-2),
289            expectation("andres", 0..=-1),
290            // str substring [ -4 , _ ]
291            // str substring   -4 ,
292            expectation("dres", -4..=i64::MAX),
293            expectation("", 0..=-110),
294            expectation("", 6..=0),
295            expectation("", 6..=-1),
296            expectation("", 6..=-2),
297            expectation("", 6..=-3),
298            expectation("", 6..=-4),
299            expectation("", 6..=-5),
300            expectation("", 6..=-6),
301        ];
302
303        for expectation in &cases {
304            println!("{expectation:?}");
305            let expected = expectation.expected;
306            let actual = action(
307                &word,
308                &Arguments {
309                    range: expectation.range(),
310                    cell_paths: None,
311                    graphemes: false,
312                },
313                Span::test_data(),
314            );
315
316            assert_eq!(actual, Value::test_string(expected));
317        }
318    }
319
320    #[test]
321    fn use_utf8_bytes() {
322        let word = Value::string(String::from("๐Ÿ‡ฏ๐Ÿ‡ตใปใ’ ใตใŒ ใดใ‚ˆ"), Span::test_data());
323
324        let range: RangeHelper = (4..=5).into();
325        let options = Arguments {
326            cell_paths: None,
327            range: range.into(),
328            graphemes: false,
329        };
330
331        let actual = action(&word, &options, Span::test_data());
332        assert_eq!(actual, Value::test_string("๏ฟฝ"));
333    }
334}