Skip to main content

zsh/
subscript.rs

1//! Array subscript parsing and indexing for zshrs
2//!
3//! Direct port from zsh/Src/params.c getindex() and getarg() functions.
4//!
5//! Handles array subscript syntax including:
6//! - Simple indices: `arr[1]`, `arr[-1]`
7//! - Ranges: `arr[1,5]`, `arr[2,-1]`
8//! - All elements: `arr[@]`, `arr[*]`
9//! - Subscript flags: `arr[(r)pattern]`, `arr[(i)string]`, etc.
10
11// Pattern matching support - uses crate::pattern module when needed
12
13/// Scan flags for parameter matching
14/// Port from zsh.h SCANPM_* constants
15pub mod scanflags {
16    pub const WANTVALS: u32 = 1 << 0;
17    pub const WANTKEYS: u32 = 1 << 1;
18    pub const WANTINDEX: u32 = 1 << 2;
19    pub const MATCHKEY: u32 = 1 << 3;
20    pub const MATCHVAL: u32 = 1 << 4;
21    pub const MATCHMANY: u32 = 1 << 5;
22    pub const KEYMATCH: u32 = 1 << 6;
23    pub const DQUOTED: u32 = 1 << 7;
24    pub const NOEXEC: u32 = 1 << 8;
25    pub const ISVAR_AT: u32 = 1 << 9;
26    pub const CHECKING: u32 = 1 << 10;
27}
28
29/// Value flags
30/// Port from zsh.h VALFLAG_* constants  
31pub mod valflags {
32    pub const INV: u32 = 1 << 0;
33    pub const EMPTY: u32 = 1 << 1;
34}
35
36/// Subscript value result
37/// Port from zsh Value struct fields relevant to subscripting
38#[derive(Debug, Clone, Default)]
39pub struct SubscriptValue {
40    pub start: i64,
41    pub end: i64,
42    pub scan_flags: u32,
43    pub val_flags: u32,
44}
45
46impl SubscriptValue {
47    pub fn new() -> Self {
48        Self::default()
49    }
50
51    pub fn single(idx: i64) -> Self {
52        Self {
53            start: idx,
54            end: idx + 1,
55            scan_flags: 0,
56            val_flags: 0,
57        }
58    }
59
60    pub fn range(start: i64, end: i64) -> Self {
61        Self {
62            start,
63            end,
64            scan_flags: 0,
65            val_flags: 0,
66        }
67    }
68
69    pub fn all() -> Self {
70        Self {
71            start: 0,
72            end: -1,
73            scan_flags: 0,
74            val_flags: 0,
75        }
76    }
77
78    pub fn is_all(&self) -> bool {
79        self.start == 0 && self.end == -1
80    }
81}
82
83/// Subscript argument parsing context
84/// Port from getarg() local variables
85struct GetArgContext<'a> {
86    s: &'a str,
87    pos: usize,
88    inv: bool,
89    rev: bool,
90    ind: bool,
91    down: bool,
92    word: bool,
93    keymatch: bool,
94    hasbeg: bool,
95    num: i64,
96    beg: i64,
97    sep: Option<String>,
98    is_hash: bool,
99    ksh_arrays: bool,
100}
101
102impl<'a> GetArgContext<'a> {
103    fn new(s: &'a str, is_hash: bool, ksh_arrays: bool) -> Self {
104        Self {
105            s,
106            pos: 0,
107            inv: false,
108            rev: false,
109            ind: false,
110            down: false,
111            word: false,
112            keymatch: false,
113            hasbeg: false,
114            num: 1,
115            beg: 0,
116            sep: None,
117            is_hash,
118            ksh_arrays,
119        }
120    }
121
122    fn current(&self) -> Option<char> {
123        self.s[self.pos..].chars().next()
124    }
125
126    fn advance(&mut self) {
127        if let Some(c) = self.current() {
128            self.pos += c.len_utf8();
129        }
130    }
131
132    fn remaining(&self) -> &str {
133        &self.s[self.pos..]
134    }
135}
136
137/// Parse subscription flags like (r), (R), (k), (K), (i), (I), (w), (f), etc.
138/// Port from getarg() flag parsing section (lines 1389-1487)
139fn parse_subscript_flags(ctx: &mut GetArgContext) {
140    let c = match ctx.current() {
141        Some(c) => c,
142        None => return,
143    };
144
145    if c != '(' {
146        return;
147    }
148
149    ctx.advance(); // skip '('
150    let mut escapes = false;
151
152    loop {
153        let c = match ctx.current() {
154            Some(c) if c != ')' => c,
155            _ => break,
156        };
157
158        match c {
159            'r' => {
160                ctx.rev = true;
161                ctx.keymatch = false;
162                ctx.down = false;
163                ctx.ind = false;
164            }
165            'R' => {
166                ctx.rev = true;
167                ctx.down = true;
168                ctx.keymatch = false;
169                ctx.ind = false;
170            }
171            'k' => {
172                ctx.keymatch = ctx.is_hash;
173                ctx.rev = true;
174                ctx.down = false;
175                ctx.ind = false;
176            }
177            'K' => {
178                ctx.keymatch = ctx.is_hash;
179                ctx.rev = true;
180                ctx.down = true;
181                ctx.ind = false;
182            }
183            'i' => {
184                ctx.rev = true;
185                ctx.ind = true;
186                ctx.down = false;
187                ctx.keymatch = false;
188            }
189            'I' => {
190                ctx.rev = true;
191                ctx.ind = true;
192                ctx.down = true;
193                ctx.keymatch = false;
194            }
195            'w' => {
196                ctx.word = true;
197            }
198            'f' => {
199                ctx.word = true;
200                ctx.sep = Some("\n".to_string());
201            }
202            'e' => {
203                // quote_arg = 1 - handled differently in Rust
204            }
205            'n' => {
206                // Parse numeric argument: n:num:
207                ctx.advance();
208                if let Some(num) = parse_delimited_number(ctx) {
209                    ctx.num = if num == 0 { 1 } else { num };
210                }
211                continue;
212            }
213            'b' => {
214                // Parse beginning offset: b:num:
215                ctx.hasbeg = true;
216                ctx.advance();
217                if let Some(beg) = parse_delimited_number(ctx) {
218                    ctx.beg = if beg > 0 { beg - 1 } else { beg };
219                }
220                continue;
221            }
222            'p' => {
223                escapes = true;
224            }
225            's' => {
226                // Parse separator: s:sep:
227                ctx.advance();
228                if let Some(sep) = parse_delimited_string(ctx) {
229                    ctx.sep = Some(sep);
230                }
231                continue;
232            }
233            _ => {
234                // Unknown flag - reset and bail
235                ctx.num = 1;
236                ctx.word = false;
237                ctx.rev = false;
238                ctx.ind = false;
239                ctx.down = false;
240                ctx.keymatch = false;
241                ctx.sep = None;
242                return;
243            }
244        }
245        ctx.advance();
246    }
247
248    // Skip closing ')'
249    if ctx.current() == Some(')') {
250        ctx.advance();
251    }
252
253    if ctx.num < 0 {
254        ctx.down = !ctx.down;
255        ctx.num = -ctx.num;
256    }
257}
258
259/// Parse a delimited number like :123:
260fn parse_delimited_number(ctx: &mut GetArgContext) -> Option<i64> {
261    let c = ctx.current()?;
262    if c != ':' {
263        return None;
264    }
265    ctx.advance();
266
267    let start = ctx.pos;
268    while let Some(c) = ctx.current() {
269        if c == ':' {
270            break;
271        }
272        ctx.advance();
273    }
274
275    let num_str = &ctx.s[start..ctx.pos];
276
277    // Skip closing ':'
278    if ctx.current() == Some(':') {
279        ctx.advance();
280    }
281
282    num_str.parse().ok()
283}
284
285/// Parse a delimited string like :sep:
286fn parse_delimited_string(ctx: &mut GetArgContext) -> Option<String> {
287    let c = ctx.current()?;
288    if c != ':' {
289        return None;
290    }
291    ctx.advance();
292
293    let start = ctx.pos;
294    while let Some(c) = ctx.current() {
295        if c == ':' {
296            break;
297        }
298        ctx.advance();
299    }
300
301    let s = ctx.s[start..ctx.pos].to_string();
302
303    // Skip closing ':'
304    if ctx.current() == Some(':') {
305        ctx.advance();
306    }
307
308    Some(s)
309}
310
311/// Parse subscript expression and find the closing bracket
312/// Port from getarg() main parsing loop (lines 1513-1546)
313fn find_subscript_end(s: &str) -> Option<usize> {
314    let mut depth = 0;
315    let mut paren_depth = 0;
316
317    for (i, c) in s.char_indices() {
318        match c {
319            '[' => depth += 1,
320            ']' if depth > 0 => depth -= 1,
321            ']' if depth == 0 && paren_depth == 0 => return Some(i),
322            '(' => paren_depth += 1,
323            ')' => {
324                if paren_depth > 0 {
325                    paren_depth -= 1;
326                }
327            }
328            ',' if depth == 0 && paren_depth == 0 => return Some(i),
329            _ => {}
330        }
331    }
332    None
333}
334
335/// Evaluate subscript expression as integer
336/// Port from mathevalarg() call in getarg()
337fn eval_subscript_expr(expr: &str, ksh_arrays: bool) -> i64 {
338    let expr = expr.trim();
339
340    // Try simple integer parse first
341    if let Ok(n) = expr.parse::<i64>() {
342        // KSH_ARRAYS adjusts positive indices
343        if ksh_arrays && n >= 0 {
344            return n + 1;
345        }
346        return n;
347    }
348
349    // Could be arithmetic expression - try our math evaluator
350    // For now, return 0 on failure
351    0
352}
353
354/// Parse array index subscript
355/// Port from getindex() in zsh/Src/params.c (lines 2001-2168)
356///
357/// Takes a subscript string like "1", "1,5", "@", "(r)pattern"
358/// Returns SubscriptValue with start/end positions
359pub fn getindex(
360    subscript: &str,
361    is_hash: bool,
362    ksh_arrays: bool,
363) -> Result<SubscriptValue, String> {
364    let s = subscript.trim();
365
366    // Handle @ and * for all elements (lines 2027-2032)
367    if s == "@" || s == "*" {
368        let mut v = SubscriptValue::all();
369        if s == "@" {
370            v.scan_flags |= scanflags::ISVAR_AT;
371        }
372        return Ok(v);
373    }
374
375    let mut ctx = GetArgContext::new(s, is_hash, ksh_arrays);
376
377    // Parse any subscription flags (lines 1389-1487)
378    parse_subscript_flags(&mut ctx);
379
380    let remaining = ctx.remaining();
381
382    // Find end of first argument (at comma or end)
383    let (first_arg, rest) = if let Some(comma_pos) = find_comma_position(remaining, is_hash) {
384        (&remaining[..comma_pos], Some(&remaining[comma_pos + 1..]))
385    } else {
386        (remaining, None)
387    };
388
389    // Evaluate first argument
390    let start = if ctx.rev {
391        // Reverse subscripting - pattern match
392        // For now, just parse as number if possible
393        eval_subscript_expr(first_arg.trim(), ksh_arrays)
394    } else {
395        eval_subscript_expr(first_arg.trim(), ksh_arrays)
396    };
397
398    // Handle range subscripts (lines 2107-2163)
399    let end = if let Some(rest) = rest {
400        // Has comma, get second argument (lines 2110-2114)
401        let end_expr = rest.trim();
402        eval_subscript_expr(end_expr, ksh_arrays)
403    } else {
404        // No comma - single element (line 2114)
405        start
406    };
407
408    let mut v = SubscriptValue::new();
409
410    if ctx.inv {
411        // Inverse indexing (lines 2040-2106)
412        v.val_flags |= valflags::INV;
413        v.start = start;
414        v.end = start + 1;
415    } else {
416        // Normal indexing (lines 2107-2163)
417        let has_comma = rest.is_some();
418
419        // Adjust start for 1-indexed to internal representation (line 2123-2124)
420        let adjusted_start = if start > 0 && !ksh_arrays {
421            start - 1
422        } else {
423            start
424        };
425
426        v.start = adjusted_start;
427        v.end = if has_comma { end } else { adjusted_start + 1 };
428    }
429
430    // Handle KSH_ARRAYS index adjustment (line 2091-2092)
431    if ksh_arrays && v.start > 0 {
432        v.start -= 1;
433    }
434
435    Ok(v)
436}
437
438/// Find comma position in subscript, respecting brackets
439fn find_comma_position(s: &str, is_hash: bool) -> Option<usize> {
440    let mut depth = 0;
441    let mut paren_depth = 0;
442
443    for (i, c) in s.char_indices() {
444        match c {
445            '[' => depth += 1,
446            ']' => {
447                if depth > 0 {
448                    depth -= 1;
449                }
450            }
451            '(' => paren_depth += 1,
452            ')' => {
453                if paren_depth > 0 {
454                    paren_depth -= 1;
455                }
456            }
457            ',' if depth == 0 && paren_depth == 0 && !is_hash => {
458                return Some(i);
459            }
460            _ => {}
461        }
462    }
463    None
464}
465
466/// Get array elements by subscript
467/// Port from array access logic in params.c
468pub fn get_array_by_subscript(arr: &[String], v: &SubscriptValue, ksh_arrays: bool) -> Vec<String> {
469    if v.is_all() {
470        return arr.to_vec();
471    }
472
473    let len = arr.len() as i64;
474
475    // Handle empty arrays
476    if len == 0 {
477        return Vec::new();
478    }
479
480    // Convert indices
481    let start_idx = normalize_index(v.start, len, ksh_arrays);
482    let end_idx = normalize_index(v.end, len, ksh_arrays);
483
484    // Clamp to valid range
485    let start = (start_idx.max(0) as usize).min(arr.len());
486    let end = (end_idx.max(0) as usize).min(arr.len());
487
488    if start >= end {
489        return Vec::new();
490    }
491
492    arr[start..end].to_vec()
493}
494
495/// Get single array element by subscript
496pub fn get_array_element_by_subscript(
497    arr: &[String],
498    v: &SubscriptValue,
499    ksh_arrays: bool,
500) -> Option<String> {
501    if v.is_all() || arr.is_empty() {
502        return None;
503    }
504
505    let len = arr.len() as i64;
506    let idx = normalize_index(v.start, len, ksh_arrays);
507
508    if idx < 0 || idx >= len {
509        return None;
510    }
511
512    arr.get(idx as usize).cloned()
513}
514
515/// Normalize array index (handle negative indices, 1-indexing)
516fn normalize_index(idx: i64, len: i64, ksh_arrays: bool) -> i64 {
517    if idx < 0 {
518        // Negative index counts from end
519        len + idx
520    } else if ksh_arrays {
521        // KSH_ARRAYS: already 0-indexed
522        idx
523    } else {
524        // zsh default: 1-indexed, but we already adjusted in getindex
525        idx
526    }
527}
528
529#[cfg(test)]
530mod tests {
531    use super::*;
532
533    #[test]
534    fn test_simple_index() {
535        let v = getindex("1", false, false).unwrap();
536        assert_eq!(v.start, 0);
537        assert_eq!(v.end, 1);
538    }
539
540    #[test]
541    fn test_simple_index_ksh() {
542        let v = getindex("0", false, true).unwrap();
543        assert_eq!(v.start, 0);
544    }
545
546    #[test]
547    fn test_range_index() {
548        let v = getindex("1,3", false, false).unwrap();
549        assert_eq!(v.start, 0);
550        assert_eq!(v.end, 3);
551    }
552
553    #[test]
554    fn test_all_index() {
555        let v = getindex("@", false, false).unwrap();
556        assert!(v.is_all());
557        assert_ne!(v.scan_flags & scanflags::ISVAR_AT, 0);
558
559        let v = getindex("*", false, false).unwrap();
560        assert!(v.is_all());
561    }
562
563    #[test]
564    fn test_negative_index() {
565        let v = getindex("-1", false, false).unwrap();
566        assert_eq!(v.start, -1);
567    }
568
569    #[test]
570    fn test_array_slice() {
571        let arr = vec![
572            "a".to_string(),
573            "b".to_string(),
574            "c".to_string(),
575            "d".to_string(),
576        ];
577
578        let v = getindex("1,2", false, false).unwrap();
579        let result = get_array_by_subscript(&arr, &v, false);
580        assert_eq!(result, vec!["a", "b"]);
581
582        let v = getindex("2,4", false, false).unwrap();
583        let result = get_array_by_subscript(&arr, &v, false);
584        assert_eq!(result, vec!["b", "c", "d"]);
585    }
586
587    #[test]
588    fn test_array_element() {
589        let arr = vec!["a".to_string(), "b".to_string(), "c".to_string()];
590
591        let v = getindex("1", false, false).unwrap();
592        let result = get_array_element_by_subscript(&arr, &v, false);
593        assert_eq!(result, Some("a".to_string()));
594
595        let v = getindex("2", false, false).unwrap();
596        let result = get_array_element_by_subscript(&arr, &v, false);
597        assert_eq!(result, Some("b".to_string()));
598    }
599}