1use std::ops::Bound;
2
3use crate::{grapheme_flags, grapheme_flags_const};
4use nu_cmd_base::input_handler::{CmdArgument, operate};
5use nu_engine::command_prelude::*;
6use nu_protocol::{IntRange, engine::StateWorkingSet};
7use unicode_segmentation::UnicodeSegmentation;
8
9#[derive(Clone)]
10pub struct StrSubstring;
11
12struct Arguments {
13 range: IntRange,
14 cell_paths: Option<Vec<CellPath>>,
15 graphemes: bool,
16}
17
18impl CmdArgument for Arguments {
19 fn take_cell_paths(&mut self) -> Option<Vec<CellPath>> {
20 self.cell_paths.take()
21 }
22}
23
24impl Command for StrSubstring {
25 fn name(&self) -> &str {
26 "str substring"
27 }
28
29 fn signature(&self) -> Signature {
30 Signature::build("str substring")
31 .input_output_types(vec![
32 (Type::String, Type::String),
33 (Type::List(Box::new(Type::String)), Type::List(Box::new(Type::String))),
34 (Type::table(), Type::table()),
35 (Type::record(), Type::record()),
36 ])
37 .allow_variants_without_examples(true)
38 .switch(
39 "grapheme-clusters",
40 "count indexes and split using grapheme clusters (all visible chars have length 1)",
41 Some('g'),
42 )
43 .switch(
44 "utf-8-bytes",
45 "count indexes and split using UTF-8 bytes (default; non-ASCII chars have length 2+)",
46 Some('b'),
47 )
48 .required(
49 "range",
50 SyntaxShape::Any,
51 "The indexes to substring [start end].",
52 )
53 .rest(
54 "rest",
55 SyntaxShape::CellPath,
56 "For a data structure input, turn strings at the given cell paths into substrings.",
57 )
58 .category(Category::Strings)
59 }
60
61 fn description(&self) -> &str {
62 "Get part of a string. Note that the first character of a string is index 0."
63 }
64
65 fn search_terms(&self) -> Vec<&str> {
66 vec!["slice"]
67 }
68
69 fn is_const(&self) -> bool {
70 true
71 }
72
73 fn run(
74 &self,
75 engine_state: &EngineState,
76 stack: &mut Stack,
77 call: &Call,
78 input: PipelineData,
79 ) -> Result<PipelineData, ShellError> {
80 let range: IntRange = call.req(engine_state, stack, 0)?;
81
82 let cell_paths: Vec<CellPath> = call.rest(engine_state, stack, 1)?;
83 let cell_paths = (!cell_paths.is_empty()).then_some(cell_paths);
84 let args = Arguments {
85 range,
86 cell_paths,
87 graphemes: grapheme_flags(engine_state, stack, call)?,
88 };
89 operate(action, args, input, call.head, engine_state.signals())
90 }
91
92 fn run_const(
93 &self,
94 working_set: &StateWorkingSet,
95 call: &Call,
96 input: PipelineData,
97 ) -> Result<PipelineData, ShellError> {
98 let range: IntRange = call.req_const(working_set, 0)?;
99
100 let cell_paths: Vec<CellPath> = call.rest_const(working_set, 1)?;
101 let cell_paths = (!cell_paths.is_empty()).then_some(cell_paths);
102 let args = Arguments {
103 range,
104 cell_paths,
105 graphemes: grapheme_flags_const(working_set, call)?,
106 };
107 operate(
108 action,
109 args,
110 input,
111 call.head,
112 working_set.permanent().signals(),
113 )
114 }
115
116 fn examples(&self) -> Vec<Example> {
117 vec![
118 Example {
119 description: "Get a substring \"nushell\" from the text \"good nushell\" using a range",
120 example: " 'good nushell' | str substring 5..11",
121 result: Some(Value::test_string("nushell")),
122 },
123 Example {
124 description: "Count indexes and split using grapheme clusters",
125 example: " '๐ฏ๐ตใปใ ใตใ ใดใ' | str substring --grapheme-clusters 4..5",
126 result: Some(Value::test_string("ใตใ")),
127 },
128 Example {
129 description: "sub string by negative index",
130 example: " 'good nushell' | str substring 5..-2",
131 result: Some(Value::test_string("nushel")),
132 },
133 ]
134 }
135}
136
137fn action(input: &Value, args: &Arguments, head: Span) -> Value {
138 match input {
139 Value::String { val: s, .. } => {
140 let s = if args.graphemes {
141 let indices = s
142 .grapheme_indices(true)
143 .map(|(idx, s)| (idx, s.len()))
144 .collect::<Vec<_>>();
145
146 let (idx_start, idx_end) = args.range.absolute_bounds(indices.len());
147 let idx_range = match idx_end {
148 Bound::Excluded(end) => &indices[idx_start..end],
149 Bound::Included(end) => &indices[idx_start..=end],
150 Bound::Unbounded => &indices[idx_start..],
151 };
152
153 if let Some((start, end)) = idx_range.first().zip(idx_range.last()) {
154 let start = start.0;
155 let end = end.0 + end.1;
156 s[start..end].to_owned()
157 } else {
158 String::new()
159 }
160 } else {
161 let (start, end) = args.range.absolute_bounds(s.len());
162 let s = match end {
163 Bound::Excluded(end) => &s.as_bytes()[start..end],
164 Bound::Included(end) => &s.as_bytes()[start..=end],
165 Bound::Unbounded => &s.as_bytes()[start..],
166 };
167 String::from_utf8_lossy(s).into_owned()
168 };
169 Value::string(s, head)
170 }
171 Value::Error { .. } => input.clone(),
173 other => Value::error(
174 ShellError::UnsupportedInput {
175 msg: "Only string values are supported".into(),
176 input: format!("input type: {:?}", other.get_type()),
177 msg_span: head,
178 input_span: other.span(),
179 },
180 head,
181 ),
182 }
183}
184
185#[cfg(test)]
186#[allow(clippy::reversed_empty_ranges)]
187mod tests {
188 use nu_protocol::IntRange;
189
190 use super::{Arguments, Span, StrSubstring, Value, action};
191
192 #[test]
193 fn test_examples() {
194 use crate::test_examples;
195
196 test_examples(StrSubstring {})
197 }
198
199 #[derive(Clone, Copy, Debug)]
200 struct RangeHelper {
201 start: i64,
202 end: i64,
203 inclusion: nu_protocol::ast::RangeInclusion,
204 }
205
206 #[derive(Debug)]
207 struct Expectation<'a> {
208 range: RangeHelper,
209 expected: &'a str,
210 }
211
212 impl From<std::ops::RangeInclusive<i64>> for RangeHelper {
213 fn from(value: std::ops::RangeInclusive<i64>) -> Self {
214 RangeHelper {
215 start: *value.start(),
216 end: *value.end(),
217 inclusion: nu_protocol::ast::RangeInclusion::Inclusive,
218 }
219 }
220 }
221
222 impl From<std::ops::Range<i64>> for RangeHelper {
223 fn from(value: std::ops::Range<i64>) -> Self {
224 RangeHelper {
225 start: value.start,
226 end: value.end,
227 inclusion: nu_protocol::ast::RangeInclusion::RightExclusive,
228 }
229 }
230 }
231
232 impl From<RangeHelper> for IntRange {
233 fn from(value: RangeHelper) -> Self {
234 match IntRange::new(
235 Value::test_int(value.start),
236 Value::test_int(value.start + (if value.start <= value.end { 1 } else { -1 })),
237 Value::test_int(value.end),
238 value.inclusion,
239 Span::test_data(),
240 ) {
241 Ok(val) => val,
242 Err(e) => {
243 panic!("{value:?}: {e:?}")
244 }
245 }
246 }
247 }
248
249 impl Expectation<'_> {
250 fn range(&self) -> IntRange {
251 self.range.into()
252 }
253 }
254
255 fn expectation(word: &str, range: impl Into<RangeHelper>) -> Expectation {
256 Expectation {
257 range: range.into(),
258 expected: word,
259 }
260 }
261
262 #[test]
263 fn substrings_indexes() {
264 let word = Value::test_string("andres");
265
266 let cases = vec![
267 expectation("", 0..0),
268 expectation("a", 0..=0),
269 expectation("an", 0..=1),
270 expectation("and", 0..=2),
271 expectation("andr", 0..=3),
272 expectation("andre", 0..=4),
273 expectation("andres", 0..=5),
274 expectation("andres", 0..=6),
275 expectation("a", 0..=-6),
276 expectation("an", 0..=-5),
277 expectation("and", 0..=-4),
278 expectation("andr", 0..=-3),
279 expectation("andre", 0..=-2),
280 expectation("andres", 0..=-1),
281 expectation("dres", -4..=i64::MAX),
284 expectation("", 0..=-110),
285 expectation("", 6..=0),
286 expectation("", 6..=-1),
287 expectation("", 6..=-2),
288 expectation("", 6..=-3),
289 expectation("", 6..=-4),
290 expectation("", 6..=-5),
291 expectation("", 6..=-6),
292 ];
293
294 for expectation in &cases {
295 println!("{:?}", expectation);
296 let expected = expectation.expected;
297 let actual = action(
298 &word,
299 &Arguments {
300 range: expectation.range(),
301 cell_paths: None,
302 graphemes: false,
303 },
304 Span::test_data(),
305 );
306
307 assert_eq!(actual, Value::test_string(expected));
308 }
309 }
310
311 #[test]
312 fn use_utf8_bytes() {
313 let word = Value::string(String::from("๐ฏ๐ตใปใ ใตใ ใดใ"), Span::test_data());
314
315 let range: RangeHelper = (4..=5).into();
316 let options = Arguments {
317 cell_paths: None,
318 range: range.into(),
319 graphemes: false,
320 };
321
322 let actual = action(&word, &options, Span::test_data());
323 assert_eq!(actual, Value::test_string("๏ฟฝ"));
324 }
325}