1use std::ops::Bound;
2
3use crate::{grapheme_flags, grapheme_flags_const};
4use nu_cmd_base::input_handler::{CmdArgument, operate};
5use nu_engine::command_prelude::*;
6use nu_protocol::{IntRange, engine::StateWorkingSet};
7use unicode_segmentation::UnicodeSegmentation;
8
9#[derive(Clone)]
10pub struct StrSubstring;
11
12struct Arguments {
13 range: IntRange,
14 cell_paths: Option<Vec<CellPath>>,
15 graphemes: bool,
16}
17
18impl CmdArgument for Arguments {
19 fn take_cell_paths(&mut self) -> Option<Vec<CellPath>> {
20 self.cell_paths.take()
21 }
22}
23
24impl Command for StrSubstring {
25 fn name(&self) -> &str {
26 "str substring"
27 }
28
29 fn signature(&self) -> Signature {
30 Signature::build("str substring")
31 .input_output_types(vec![
32 (Type::String, Type::String),
33 (Type::List(Box::new(Type::String)), Type::List(Box::new(Type::String))),
34 (Type::table(), Type::table()),
35 (Type::record(), Type::record()),
36 ])
37 .allow_variants_without_examples(true)
38 .switch(
39 "grapheme-clusters",
40 "count indexes and split using grapheme clusters (all visible chars have length 1)",
41 Some('g'),
42 )
43 .switch(
44 "utf-8-bytes",
45 "count indexes and split using UTF-8 bytes (default; non-ASCII chars have length 2+)",
46 Some('b'),
47 )
48 .required(
49 "range",
50 SyntaxShape::Any,
51 "The indexes to substring [start end].",
52 )
53 .rest(
54 "rest",
55 SyntaxShape::CellPath,
56 "For a data structure input, turn strings at the given cell paths into substrings.",
57 )
58 .category(Category::Strings)
59 }
60
61 fn description(&self) -> &str {
62 "Get part of a string. Note that the first character of a string is index 0."
63 }
64
65 fn search_terms(&self) -> Vec<&str> {
66 vec!["slice"]
67 }
68
69 fn is_const(&self) -> bool {
70 true
71 }
72
73 fn run(
74 &self,
75 engine_state: &EngineState,
76 stack: &mut Stack,
77 call: &Call,
78 input: PipelineData,
79 ) -> Result<PipelineData, ShellError> {
80 let range: IntRange = call.req(engine_state, stack, 0)?;
81
82 let cell_paths: Vec<CellPath> = call.rest(engine_state, stack, 1)?;
83 let cell_paths = (!cell_paths.is_empty()).then_some(cell_paths);
84 let args = Arguments {
85 range,
86 cell_paths,
87 graphemes: grapheme_flags(engine_state, stack, call)?,
88 };
89 operate(action, args, input, call.head, engine_state.signals()).map(|pipeline| {
90 let metadata = pipeline.metadata().map(|m| m.with_content_type(None));
92 pipeline.set_metadata(metadata)
93 })
94 }
95
96 fn run_const(
97 &self,
98 working_set: &StateWorkingSet,
99 call: &Call,
100 input: PipelineData,
101 ) -> Result<PipelineData, ShellError> {
102 let range: IntRange = call.req_const(working_set, 0)?;
103
104 let cell_paths: Vec<CellPath> = call.rest_const(working_set, 1)?;
105 let cell_paths = (!cell_paths.is_empty()).then_some(cell_paths);
106 let args = Arguments {
107 range,
108 cell_paths,
109 graphemes: grapheme_flags_const(working_set, call)?,
110 };
111 operate(
112 action,
113 args,
114 input,
115 call.head,
116 working_set.permanent().signals(),
117 )
118 .map(|pipeline| {
119 let metadata = pipeline.metadata().map(|m| m.with_content_type(None));
121 pipeline.set_metadata(metadata)
122 })
123 }
124
125 fn examples(&self) -> Vec<Example<'_>> {
126 vec![
127 Example {
128 description: "Get a substring \"nushell\" from the text \"good nushell\" using a range",
129 example: " 'good nushell' | str substring 5..11",
130 result: Some(Value::test_string("nushell")),
131 },
132 Example {
133 description: "Count indexes and split using grapheme clusters",
134 example: " '๐ฏ๐ตใปใ ใตใ ใดใ' | str substring --grapheme-clusters 4..5",
135 result: Some(Value::test_string("ใตใ")),
136 },
137 Example {
138 description: "sub string by negative index",
139 example: " 'good nushell' | str substring 5..-2",
140 result: Some(Value::test_string("nushel")),
141 },
142 ]
143 }
144}
145
146fn action(input: &Value, args: &Arguments, head: Span) -> Value {
147 match input {
148 Value::String { val: s, .. } => {
149 let s = if args.graphemes {
150 let indices = s
151 .grapheme_indices(true)
152 .map(|(idx, s)| (idx, s.len()))
153 .collect::<Vec<_>>();
154
155 let (idx_start, idx_end) = args.range.absolute_bounds(indices.len());
156 let idx_range = match idx_end {
157 Bound::Excluded(end) => &indices[idx_start..end],
158 Bound::Included(end) => &indices[idx_start..=end],
159 Bound::Unbounded => &indices[idx_start..],
160 };
161
162 if let Some((start, end)) = idx_range.first().zip(idx_range.last()) {
163 let start = start.0;
164 let end = end.0 + end.1;
165 s[start..end].to_owned()
166 } else {
167 String::new()
168 }
169 } else {
170 let (start, end) = args.range.absolute_bounds(s.len());
171 let s = match end {
172 Bound::Excluded(end) => &s.as_bytes()[start..end],
173 Bound::Included(end) => &s.as_bytes()[start..=end],
174 Bound::Unbounded => &s.as_bytes()[start..],
175 };
176 String::from_utf8_lossy(s).into_owned()
177 };
178 Value::string(s, head)
179 }
180 Value::Error { .. } => input.clone(),
182 other => Value::error(
183 ShellError::UnsupportedInput {
184 msg: "Only string values are supported".into(),
185 input: format!("input type: {:?}", other.get_type()),
186 msg_span: head,
187 input_span: other.span(),
188 },
189 head,
190 ),
191 }
192}
193
194#[cfg(test)]
195#[allow(clippy::reversed_empty_ranges)]
196mod tests {
197 use nu_protocol::IntRange;
198
199 use super::{Arguments, Span, StrSubstring, Value, action};
200
201 #[test]
202 fn test_examples() {
203 use crate::test_examples;
204
205 test_examples(StrSubstring {})
206 }
207
208 #[derive(Clone, Copy, Debug)]
209 struct RangeHelper {
210 start: i64,
211 end: i64,
212 inclusion: nu_protocol::ast::RangeInclusion,
213 }
214
215 #[derive(Debug)]
216 struct Expectation<'a> {
217 range: RangeHelper,
218 expected: &'a str,
219 }
220
221 impl From<std::ops::RangeInclusive<i64>> for RangeHelper {
222 fn from(value: std::ops::RangeInclusive<i64>) -> Self {
223 RangeHelper {
224 start: *value.start(),
225 end: *value.end(),
226 inclusion: nu_protocol::ast::RangeInclusion::Inclusive,
227 }
228 }
229 }
230
231 impl From<std::ops::Range<i64>> for RangeHelper {
232 fn from(value: std::ops::Range<i64>) -> Self {
233 RangeHelper {
234 start: value.start,
235 end: value.end,
236 inclusion: nu_protocol::ast::RangeInclusion::RightExclusive,
237 }
238 }
239 }
240
241 impl From<RangeHelper> for IntRange {
242 fn from(value: RangeHelper) -> Self {
243 match IntRange::new(
244 Value::test_int(value.start),
245 Value::test_int(value.start + (if value.start <= value.end { 1 } else { -1 })),
246 Value::test_int(value.end),
247 value.inclusion,
248 Span::test_data(),
249 ) {
250 Ok(val) => val,
251 Err(e) => {
252 panic!("{value:?}: {e:?}")
253 }
254 }
255 }
256 }
257
258 impl Expectation<'_> {
259 fn range(&self) -> IntRange {
260 self.range.into()
261 }
262 }
263
264 fn expectation(word: &str, range: impl Into<RangeHelper>) -> Expectation<'_> {
265 Expectation {
266 range: range.into(),
267 expected: word,
268 }
269 }
270
271 #[test]
272 fn substrings_indexes() {
273 let word = Value::test_string("andres");
274
275 let cases = vec![
276 expectation("", 0..0),
277 expectation("a", 0..=0),
278 expectation("an", 0..=1),
279 expectation("and", 0..=2),
280 expectation("andr", 0..=3),
281 expectation("andre", 0..=4),
282 expectation("andres", 0..=5),
283 expectation("andres", 0..=6),
284 expectation("a", 0..=-6),
285 expectation("an", 0..=-5),
286 expectation("and", 0..=-4),
287 expectation("andr", 0..=-3),
288 expectation("andre", 0..=-2),
289 expectation("andres", 0..=-1),
290 expectation("dres", -4..=i64::MAX),
293 expectation("", 0..=-110),
294 expectation("", 6..=0),
295 expectation("", 6..=-1),
296 expectation("", 6..=-2),
297 expectation("", 6..=-3),
298 expectation("", 6..=-4),
299 expectation("", 6..=-5),
300 expectation("", 6..=-6),
301 ];
302
303 for expectation in &cases {
304 println!("{expectation:?}");
305 let expected = expectation.expected;
306 let actual = action(
307 &word,
308 &Arguments {
309 range: expectation.range(),
310 cell_paths: None,
311 graphemes: false,
312 },
313 Span::test_data(),
314 );
315
316 assert_eq!(actual, Value::test_string(expected));
317 }
318 }
319
320 #[test]
321 fn use_utf8_bytes() {
322 let word = Value::string(String::from("๐ฏ๐ตใปใ ใตใ ใดใ"), Span::test_data());
323
324 let range: RangeHelper = (4..=5).into();
325 let options = Arguments {
326 cell_paths: None,
327 range: range.into(),
328 graphemes: false,
329 };
330
331 let actual = action(&word, &options, Span::test_data());
332 assert_eq!(actual, Value::test_string("๏ฟฝ"));
333 }
334}