Skip to main content

nu_command/strings/str_/
substring.rs

1use std::ops::Bound;
2
3use crate::{grapheme_flags, grapheme_flags_const};
4use nu_cmd_base::input_handler::{CmdArgument, operate};
5use nu_engine::command_prelude::*;
6use nu_protocol::{IntRange, engine::StateWorkingSet};
7use unicode_segmentation::UnicodeSegmentation;
8
9#[derive(Clone)]
10pub struct StrSubstring;
11
12struct Arguments {
13    range: IntRange,
14    cell_paths: Option<Vec<CellPath>>,
15    graphemes: bool,
16}
17
18impl CmdArgument for Arguments {
19    fn take_cell_paths(&mut self) -> Option<Vec<CellPath>> {
20        self.cell_paths.take()
21    }
22}
23
24impl Command for StrSubstring {
25    fn name(&self) -> &str {
26        "str substring"
27    }
28
29    fn signature(&self) -> Signature {
30        Signature::build("str substring")
31            .input_output_types(vec![
32                (Type::String, Type::String),
33                (Type::List(Box::new(Type::String)), Type::List(Box::new(Type::String))),
34                (Type::table(), Type::table()),
35                (Type::record(), Type::record()),
36            ])
37            .allow_variants_without_examples(true)
38            .switch(
39                "grapheme-clusters",
40                "Count indexes and split using grapheme clusters (all visible chars have length 1).",
41                Some('g'),
42            )
43            .switch(
44                "utf-8-bytes",
45                "Count indexes and split using UTF-8 bytes (default; non-ASCII chars have length 2+).",
46                Some('b'),
47            )
48            .required(
49                "range",
50                SyntaxShape::Any,
51                "The indexes to substring [start end].",
52            )
53            .rest(
54                "rest",
55                SyntaxShape::CellPath,
56                "For a data structure input, turn strings at the given cell paths into substrings.",
57            )
58            .category(Category::Strings)
59    }
60
61    fn description(&self) -> &str {
62        "Get part of a string. Note that the first character of a string is index 0."
63    }
64
65    fn search_terms(&self) -> Vec<&str> {
66        vec!["slice"]
67    }
68
69    fn is_const(&self) -> bool {
70        true
71    }
72
73    fn run(
74        &self,
75        engine_state: &EngineState,
76        stack: &mut Stack,
77        call: &Call,
78        input: PipelineData,
79    ) -> Result<PipelineData, ShellError> {
80        let range: IntRange = call.req(engine_state, stack, 0)?;
81
82        let cell_paths: Vec<CellPath> = call.rest(engine_state, stack, 1)?;
83        let cell_paths = (!cell_paths.is_empty()).then_some(cell_paths);
84        let args = Arguments {
85            range,
86            cell_paths,
87            graphemes: grapheme_flags(engine_state, stack, call)?,
88        };
89        operate(action, args, input, call.head, engine_state.signals()).map(|mut pipeline| {
90            if let Some(metadata) = pipeline.metadata_mut() {
91                // a substring of text/json is not necessarily text/json itself
92                metadata.content_type = None;
93            }
94            pipeline
95        })
96    }
97
98    fn run_const(
99        &self,
100        working_set: &StateWorkingSet,
101        call: &Call,
102        input: PipelineData,
103    ) -> Result<PipelineData, ShellError> {
104        let range: IntRange = call.req_const(working_set, 0)?;
105
106        let cell_paths: Vec<CellPath> = call.rest_const(working_set, 1)?;
107        let cell_paths = (!cell_paths.is_empty()).then_some(cell_paths);
108        let args = Arguments {
109            range,
110            cell_paths,
111            graphemes: grapheme_flags_const(working_set, call)?,
112        };
113        operate(
114            action,
115            args,
116            input,
117            call.head,
118            working_set.permanent().signals(),
119        )
120        .map(|mut pipeline| {
121            if let Some(metadata) = pipeline.metadata_mut() {
122                // a substring of text/json is not necessarily text/json itself
123                metadata.content_type = None;
124            }
125            pipeline
126        })
127    }
128
129    fn examples(&self) -> Vec<Example<'_>> {
130        vec![
131            Example {
132                description: "Get a substring \"nushell\" from the text \"good nushell\" using a range.",
133                example: " 'good nushell' | str substring 5..11",
134                result: Some(Value::test_string("nushell")),
135            },
136            Example {
137                description: "Count indexes and split using grapheme clusters.",
138                example: " '๐Ÿ‡ฏ๐Ÿ‡ตใปใ’ ใตใŒ ใดใ‚ˆ' | str substring --grapheme-clusters 4..5",
139                result: Some(Value::test_string("ใตใŒ")),
140            },
141            Example {
142                description: "sub string by negative index.",
143                example: " 'good nushell' | str substring 5..-2",
144                result: Some(Value::test_string("nushel")),
145            },
146        ]
147    }
148}
149
150fn action(input: &Value, args: &Arguments, head: Span) -> Value {
151    match input {
152        Value::String { val: s, .. } => {
153            let s = if args.graphemes {
154                let indices = s
155                    .grapheme_indices(true)
156                    .map(|(idx, s)| (idx, s.len()))
157                    .collect::<Vec<_>>();
158
159                let (idx_start, idx_end) = args.range.absolute_bounds(indices.len());
160                let idx_range = match idx_end {
161                    Bound::Excluded(end) => &indices[idx_start..end],
162                    Bound::Included(end) => &indices[idx_start..=end],
163                    Bound::Unbounded => &indices[idx_start..],
164                };
165
166                if let Some((start, end)) = idx_range.first().zip(idx_range.last()) {
167                    let start = start.0;
168                    let end = end.0 + end.1;
169                    s[start..end].to_owned()
170                } else {
171                    String::new()
172                }
173            } else {
174                let (start, end) = args.range.absolute_bounds(s.len());
175                let s = match end {
176                    Bound::Excluded(end) => &s.as_bytes()[start..end],
177                    Bound::Included(end) => &s.as_bytes()[start..=end],
178                    Bound::Unbounded => &s.as_bytes()[start..],
179                };
180                String::from_utf8_lossy(s).into_owned()
181            };
182            Value::string(s, head)
183        }
184        // Propagate errors by explicitly matching them before the final case.
185        Value::Error { .. } => input.clone(),
186        other => Value::error(
187            ShellError::UnsupportedInput {
188                msg: "Only string values are supported".into(),
189                input: format!("input type: {:?}", other.get_type()),
190                msg_span: head,
191                input_span: other.span(),
192            },
193            head,
194        ),
195    }
196}
197
198#[cfg(test)]
199#[allow(clippy::reversed_empty_ranges)]
200mod tests {
201    use nu_protocol::IntRange;
202
203    use super::{Arguments, Span, StrSubstring, Value, action};
204
205    #[test]
206    fn test_examples() -> nu_test_support::Result {
207        nu_test_support::test().examples(StrSubstring)
208    }
209
210    #[derive(Clone, Copy, Debug)]
211    struct RangeHelper {
212        start: i64,
213        end: i64,
214        inclusion: nu_protocol::ast::RangeInclusion,
215    }
216
217    #[derive(Debug)]
218    struct Expectation<'a> {
219        range: RangeHelper,
220        expected: &'a str,
221    }
222
223    impl From<std::ops::RangeInclusive<i64>> for RangeHelper {
224        fn from(value: std::ops::RangeInclusive<i64>) -> Self {
225            RangeHelper {
226                start: *value.start(),
227                end: *value.end(),
228                inclusion: nu_protocol::ast::RangeInclusion::Inclusive,
229            }
230        }
231    }
232
233    impl From<std::ops::Range<i64>> for RangeHelper {
234        fn from(value: std::ops::Range<i64>) -> Self {
235            RangeHelper {
236                start: value.start,
237                end: value.end,
238                inclusion: nu_protocol::ast::RangeInclusion::RightExclusive,
239            }
240        }
241    }
242
243    impl From<RangeHelper> for IntRange {
244        fn from(value: RangeHelper) -> Self {
245            match IntRange::new(
246                Value::test_int(value.start),
247                Value::test_int(value.start + (if value.start <= value.end { 1 } else { -1 })),
248                Value::test_int(value.end),
249                value.inclusion,
250                Span::test_data(),
251            ) {
252                Ok(val) => val,
253                Err(e) => {
254                    panic!("{value:?}: {e:?}")
255                }
256            }
257        }
258    }
259
260    impl Expectation<'_> {
261        fn range(&self) -> IntRange {
262            self.range.into()
263        }
264    }
265
266    fn expectation(word: &str, range: impl Into<RangeHelper>) -> Expectation<'_> {
267        Expectation {
268            range: range.into(),
269            expected: word,
270        }
271    }
272
273    #[test]
274    fn substrings_indexes() {
275        let word = Value::test_string("andres");
276
277        let cases = vec![
278            expectation("", 0..0),
279            expectation("a", 0..=0),
280            expectation("an", 0..=1),
281            expectation("and", 0..=2),
282            expectation("andr", 0..=3),
283            expectation("andre", 0..=4),
284            expectation("andres", 0..=5),
285            expectation("andres", 0..=6),
286            expectation("a", 0..=-6),
287            expectation("an", 0..=-5),
288            expectation("and", 0..=-4),
289            expectation("andr", 0..=-3),
290            expectation("andre", 0..=-2),
291            expectation("andres", 0..=-1),
292            // str substring [ -4 , _ ]
293            // str substring   -4 ,
294            expectation("dres", -4..=i64::MAX),
295            expectation("", 0..=-110),
296            expectation("", 6..=0),
297            expectation("", 6..=-1),
298            expectation("", 6..=-2),
299            expectation("", 6..=-3),
300            expectation("", 6..=-4),
301            expectation("", 6..=-5),
302            expectation("", 6..=-6),
303        ];
304
305        for expectation in &cases {
306            println!("{expectation:?}");
307            let expected = expectation.expected;
308            let actual = action(
309                &word,
310                &Arguments {
311                    range: expectation.range(),
312                    cell_paths: None,
313                    graphemes: false,
314                },
315                Span::test_data(),
316            );
317
318            assert_eq!(actual, Value::test_string(expected));
319        }
320    }
321
322    #[test]
323    fn use_utf8_bytes() {
324        let word = Value::string(String::from("๐Ÿ‡ฏ๐Ÿ‡ตใปใ’ ใตใŒ ใดใ‚ˆ"), Span::test_data());
325
326        let range: RangeHelper = (4..=5).into();
327        let options = Arguments {
328            cell_paths: None,
329            range: range.into(),
330            graphemes: false,
331        };
332
333        let actual = action(&word, &options, Span::test_data());
334        assert_eq!(actual, Value::test_string("๏ฟฝ"));
335    }
336}