1use std::ops::Bound;
2
3use crate::{grapheme_flags, grapheme_flags_const};
4use nu_cmd_base::input_handler::{CmdArgument, operate};
5use nu_engine::command_prelude::*;
6use nu_protocol::{IntRange, engine::StateWorkingSet};
7use unicode_segmentation::UnicodeSegmentation;
8
9#[derive(Clone)]
10pub struct StrSubstring;
11
12struct Arguments {
13 range: IntRange,
14 cell_paths: Option<Vec<CellPath>>,
15 graphemes: bool,
16}
17
18impl CmdArgument for Arguments {
19 fn take_cell_paths(&mut self) -> Option<Vec<CellPath>> {
20 self.cell_paths.take()
21 }
22}
23
24impl Command for StrSubstring {
25 fn name(&self) -> &str {
26 "str substring"
27 }
28
29 fn signature(&self) -> Signature {
30 Signature::build("str substring")
31 .input_output_types(vec![
32 (Type::String, Type::String),
33 (Type::List(Box::new(Type::String)), Type::List(Box::new(Type::String))),
34 (Type::table(), Type::table()),
35 (Type::record(), Type::record()),
36 ])
37 .allow_variants_without_examples(true)
38 .switch(
39 "grapheme-clusters",
40 "Count indexes and split using grapheme clusters (all visible chars have length 1).",
41 Some('g'),
42 )
43 .switch(
44 "utf-8-bytes",
45 "Count indexes and split using UTF-8 bytes (default; non-ASCII chars have length 2+).",
46 Some('b'),
47 )
48 .required(
49 "range",
50 SyntaxShape::Any,
51 "The indexes to substring [start end].",
52 )
53 .rest(
54 "rest",
55 SyntaxShape::CellPath,
56 "For a data structure input, turn strings at the given cell paths into substrings.",
57 )
58 .category(Category::Strings)
59 }
60
61 fn description(&self) -> &str {
62 "Get part of a string. Note that the first character of a string is index 0."
63 }
64
65 fn search_terms(&self) -> Vec<&str> {
66 vec!["slice"]
67 }
68
69 fn is_const(&self) -> bool {
70 true
71 }
72
73 fn run(
74 &self,
75 engine_state: &EngineState,
76 stack: &mut Stack,
77 call: &Call,
78 input: PipelineData,
79 ) -> Result<PipelineData, ShellError> {
80 let range: IntRange = call.req(engine_state, stack, 0)?;
81
82 let cell_paths: Vec<CellPath> = call.rest(engine_state, stack, 1)?;
83 let cell_paths = (!cell_paths.is_empty()).then_some(cell_paths);
84 let args = Arguments {
85 range,
86 cell_paths,
87 graphemes: grapheme_flags(engine_state, stack, call)?,
88 };
89 operate(action, args, input, call.head, engine_state.signals()).map(|mut pipeline| {
90 if let Some(metadata) = pipeline.metadata_mut() {
91 metadata.content_type = None;
93 }
94 pipeline
95 })
96 }
97
98 fn run_const(
99 &self,
100 working_set: &StateWorkingSet,
101 call: &Call,
102 input: PipelineData,
103 ) -> Result<PipelineData, ShellError> {
104 let range: IntRange = call.req_const(working_set, 0)?;
105
106 let cell_paths: Vec<CellPath> = call.rest_const(working_set, 1)?;
107 let cell_paths = (!cell_paths.is_empty()).then_some(cell_paths);
108 let args = Arguments {
109 range,
110 cell_paths,
111 graphemes: grapheme_flags_const(working_set, call)?,
112 };
113 operate(
114 action,
115 args,
116 input,
117 call.head,
118 working_set.permanent().signals(),
119 )
120 .map(|mut pipeline| {
121 if let Some(metadata) = pipeline.metadata_mut() {
122 metadata.content_type = None;
124 }
125 pipeline
126 })
127 }
128
129 fn examples(&self) -> Vec<Example<'_>> {
130 vec![
131 Example {
132 description: "Get a substring \"nushell\" from the text \"good nushell\" using a range.",
133 example: " 'good nushell' | str substring 5..11",
134 result: Some(Value::test_string("nushell")),
135 },
136 Example {
137 description: "Count indexes and split using grapheme clusters.",
138 example: " '๐ฏ๐ตใปใ ใตใ ใดใ' | str substring --grapheme-clusters 4..5",
139 result: Some(Value::test_string("ใตใ")),
140 },
141 Example {
142 description: "sub string by negative index.",
143 example: " 'good nushell' | str substring 5..-2",
144 result: Some(Value::test_string("nushel")),
145 },
146 ]
147 }
148}
149
150fn action(input: &Value, args: &Arguments, head: Span) -> Value {
151 match input {
152 Value::String { val: s, .. } => {
153 let s = if args.graphemes {
154 let indices = s
155 .grapheme_indices(true)
156 .map(|(idx, s)| (idx, s.len()))
157 .collect::<Vec<_>>();
158
159 let (idx_start, idx_end) = args.range.absolute_bounds(indices.len());
160 let idx_range = match idx_end {
161 Bound::Excluded(end) => &indices[idx_start..end],
162 Bound::Included(end) => &indices[idx_start..=end],
163 Bound::Unbounded => &indices[idx_start..],
164 };
165
166 if let Some((start, end)) = idx_range.first().zip(idx_range.last()) {
167 let start = start.0;
168 let end = end.0 + end.1;
169 s[start..end].to_owned()
170 } else {
171 String::new()
172 }
173 } else {
174 let (start, end) = args.range.absolute_bounds(s.len());
175 let s = match end {
176 Bound::Excluded(end) => &s.as_bytes()[start..end],
177 Bound::Included(end) => &s.as_bytes()[start..=end],
178 Bound::Unbounded => &s.as_bytes()[start..],
179 };
180 String::from_utf8_lossy(s).into_owned()
181 };
182 Value::string(s, head)
183 }
184 Value::Error { .. } => input.clone(),
186 other => Value::error(
187 ShellError::UnsupportedInput {
188 msg: "Only string values are supported".into(),
189 input: format!("input type: {:?}", other.get_type()),
190 msg_span: head,
191 input_span: other.span(),
192 },
193 head,
194 ),
195 }
196}
197
198#[cfg(test)]
199#[allow(clippy::reversed_empty_ranges)]
200mod tests {
201 use nu_protocol::IntRange;
202
203 use super::{Arguments, Span, StrSubstring, Value, action};
204
205 #[test]
206 fn test_examples() -> nu_test_support::Result {
207 nu_test_support::test().examples(StrSubstring)
208 }
209
210 #[derive(Clone, Copy, Debug)]
211 struct RangeHelper {
212 start: i64,
213 end: i64,
214 inclusion: nu_protocol::ast::RangeInclusion,
215 }
216
217 #[derive(Debug)]
218 struct Expectation<'a> {
219 range: RangeHelper,
220 expected: &'a str,
221 }
222
223 impl From<std::ops::RangeInclusive<i64>> for RangeHelper {
224 fn from(value: std::ops::RangeInclusive<i64>) -> Self {
225 RangeHelper {
226 start: *value.start(),
227 end: *value.end(),
228 inclusion: nu_protocol::ast::RangeInclusion::Inclusive,
229 }
230 }
231 }
232
233 impl From<std::ops::Range<i64>> for RangeHelper {
234 fn from(value: std::ops::Range<i64>) -> Self {
235 RangeHelper {
236 start: value.start,
237 end: value.end,
238 inclusion: nu_protocol::ast::RangeInclusion::RightExclusive,
239 }
240 }
241 }
242
243 impl From<RangeHelper> for IntRange {
244 fn from(value: RangeHelper) -> Self {
245 match IntRange::new(
246 Value::test_int(value.start),
247 Value::test_int(value.start + (if value.start <= value.end { 1 } else { -1 })),
248 Value::test_int(value.end),
249 value.inclusion,
250 Span::test_data(),
251 ) {
252 Ok(val) => val,
253 Err(e) => {
254 panic!("{value:?}: {e:?}")
255 }
256 }
257 }
258 }
259
260 impl Expectation<'_> {
261 fn range(&self) -> IntRange {
262 self.range.into()
263 }
264 }
265
266 fn expectation(word: &str, range: impl Into<RangeHelper>) -> Expectation<'_> {
267 Expectation {
268 range: range.into(),
269 expected: word,
270 }
271 }
272
273 #[test]
274 fn substrings_indexes() {
275 let word = Value::test_string("andres");
276
277 let cases = vec![
278 expectation("", 0..0),
279 expectation("a", 0..=0),
280 expectation("an", 0..=1),
281 expectation("and", 0..=2),
282 expectation("andr", 0..=3),
283 expectation("andre", 0..=4),
284 expectation("andres", 0..=5),
285 expectation("andres", 0..=6),
286 expectation("a", 0..=-6),
287 expectation("an", 0..=-5),
288 expectation("and", 0..=-4),
289 expectation("andr", 0..=-3),
290 expectation("andre", 0..=-2),
291 expectation("andres", 0..=-1),
292 expectation("dres", -4..=i64::MAX),
295 expectation("", 0..=-110),
296 expectation("", 6..=0),
297 expectation("", 6..=-1),
298 expectation("", 6..=-2),
299 expectation("", 6..=-3),
300 expectation("", 6..=-4),
301 expectation("", 6..=-5),
302 expectation("", 6..=-6),
303 ];
304
305 for expectation in &cases {
306 println!("{expectation:?}");
307 let expected = expectation.expected;
308 let actual = action(
309 &word,
310 &Arguments {
311 range: expectation.range(),
312 cell_paths: None,
313 graphemes: false,
314 },
315 Span::test_data(),
316 );
317
318 assert_eq!(actual, Value::test_string(expected));
319 }
320 }
321
322 #[test]
323 fn use_utf8_bytes() {
324 let word = Value::string(String::from("๐ฏ๐ตใปใ ใตใ ใดใ"), Span::test_data());
325
326 let range: RangeHelper = (4..=5).into();
327 let options = Arguments {
328 cell_paths: None,
329 range: range.into(),
330 graphemes: false,
331 };
332
333 let actual = action(&word, &options, Span::test_data());
334 assert_eq!(actual, Value::test_string("๏ฟฝ"));
335 }
336}