vorto 0.4.0

A terminal text editor with tree-sitter syntax highlighting and LSP support
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
//! Pure parsing — `KeyEvent` → `Token`, `&[Token]` → `Expr`.
//!
//! Two stages live here:
//!
//! 1. [`tokenize`] resolves a single `KeyEvent` to an `Option<Token>` in
//!    the current parse context, looking at the trailing tokens to decide
//!    whether the next key is a count, an operator's argument, a text
//!    object follower, etc.
//! 2. [`classify`] inspects the running token list and decides if it's a
//!    completed command ([`Parse::Complete`]), a valid prefix that should
//!    keep accumulating ([`Parse::Incomplete`]), or junk to drop
//!    ([`Parse::Invalid`]).
//!
//! Both are free functions of the token slice + `Keymap` — no `App`
//! borrow, no side effects. The evaluator in `super` consumes the
//! `Expr` they produce.

use crossterm::event::{KeyCode, KeyEvent, KeyModifiers};

use crate::action::{DirectKind, Expr, MotionExpr, MotionKind, Operator, Target, Token};
use crate::config::{GOTO_BINDINGS, KeySig, Keymap, OBJECT_BINDINGS, OP_PENDING_BINDINGS, Z_BINDINGS};
use crate::mode::Mode;

/// Result of [`classify`].
#[derive(Debug)]
pub(in crate::app) enum Parse {
    Complete(Expr),
    Incomplete,
    Invalid,
}

/// Tokenization context — what the parser is "expecting" next, derived
/// from the trailing tokens of the current command.
#[derive(Debug, Clone, Copy)]
enum ParseCtx {
    /// Top of a fresh command, or right after one or more Count tokens.
    Initial,
    /// Right after `<space>` — looking for a leader-bound action.
    LeaderPending,
    /// Right after an operator (or `<count><op>`). Now expecting
    /// a motion, a Scope marker, a Count, or the operator key itself
    /// again for the SelfDouble shortcut.
    OpPending,
    /// Right after a Scope marker (`i` / `a`). Expecting an object.
    ObjectExpected,
    /// Right after `g`. Expecting the second `g` for goto-file-start.
    GotoPending,
    /// Right after `f`/`F`/`t`/`T` (or `r`). Expecting the literal
    /// target/replacement character — the next key (whatever it is)
    /// becomes the argument. The emitted token depends on which
    /// prefix is on the stack (see [`char_arg_token`]).
    CharArgPending,
    /// Right after `z`. Expecting one of `z`/`t`/`b` for the viewport
    /// scroll-to family.
    ZPending,
}

/// Decide which tokenization context the next key falls into by looking
/// at the trailing tokens. Pure function of the token slice.
fn context_of(prev: &[Token]) -> ParseCtx {
    use Token::*;
    // Skip trailing Counts when deciding context — counts don't change
    // what kind of token is expected next, only the magnitude.
    let mut last: Option<&Token> = None;
    for t in prev.iter().rev() {
        if !matches!(t, Count(_)) {
            last = Some(t);
            break;
        }
    }
    match last {
        None => ParseCtx::Initial,
        Some(LeaderPrefix) => ParseCtx::LeaderPending,
        Some(Op(_)) => ParseCtx::OpPending,
        Some(Scope(_)) => ParseCtx::ObjectExpected,
        Some(GotoPrefix) => ParseCtx::GotoPending,
        Some(FindCharPrefix { .. } | ReplaceCharPrefix) => ParseCtx::CharArgPending,
        Some(ZPrefix) => ParseCtx::ZPending,
        // After Motion/Direct/Object/SelfDouble the command is already
        // Complete; we shouldn't be tokenizing in those contexts.
        _ => ParseCtx::Initial,
    }
}

/// Resolve a key to its token in the current parse context.
///
/// Returns `None` when the key has no meaning in the current context —
/// the caller should treat this as a parse abort (clear the token
/// list). Only called for Normal mode.
pub(in crate::app) fn tokenize(
    km: &Keymap,
    prev: &[Token],
    mode: Mode,
    key: KeyEvent,
) -> Option<Token> {
    debug_assert_eq!(mode, Mode::Normal);

    // Ctrl-r is redo (vim convention). Works in any context.
    if key.modifiers.contains(KeyModifiers::CONTROL) && key.code == KeyCode::Char('r') {
        return Some(Token::Direct(DirectKind::Redo));
    }

    let ctx = context_of(prev);
    let code = key.code;

    // Digit handling stays special: count parsing is a parser
    // primitive, not a user-rebindable shortcut.
    if let Some(c) = ascii_digit(code) {
        let already_counting = matches!(prev.last(), Some(Token::Count(_)));
        let d = c.to_digit(10).unwrap();
        return match (ctx, c, already_counting) {
            // 0 alone in Initial is the line-start motion, not a count.
            (ParseCtx::Initial, '0', false) => Some(Token::Motion(MotionKind::LineStart)),
            // 0 inside a running count extends it.
            (_, '0', true) => Some(Token::Count(0)),
            // 1-9 always starts/extends a count (Initial or OpPending).
            (ParseCtx::Initial | ParseCtx::OpPending, '1'..='9', _) => Some(Token::Count(d)),
            // In LeaderPending / ObjectExpected, digits don't make sense.
            _ => None,
        };
    }

    let sig = KeySig::from_event(key);
    match ctx {
        ParseCtx::Initial => km.initial.get(&sig).copied(),
        ParseCtx::LeaderPending => km.leader.get(&sig).copied(),
        ParseCtx::OpPending => op_pending_token(code, prev),
        ParseCtx::ObjectExpected => object_token(code),
        ParseCtx::GotoPending => goto_pending_token(code),
        ParseCtx::CharArgPending => char_arg_token(code, prev),
        ParseCtx::ZPending => z_pending_token(code),
    }
}

/// In CharArgPending, any printable character becomes the literal
/// argument. The output token depends on the most recent pending
/// prefix — `f`/`F`/`t`/`T` produce a `FindChar` motion, `r`
/// produces a `ReplaceChar` direct.
fn char_arg_token(code: KeyCode, prev: &[Token]) -> Option<Token> {
    let prefix = prev.iter().rev().find(|t| {
        matches!(
            t,
            Token::FindCharPrefix { .. } | Token::ReplaceCharPrefix
        )
    })?;
    let KeyCode::Char(ch) = code else {
        // Escape/arrow/etc abort the pending arg — return None so the
        // caller clears the token stack.
        return None;
    };
    match prefix {
        Token::FindCharPrefix { forward, till } => {
            Some(Token::Motion(MotionKind::FindChar {
                ch,
                forward: *forward,
                till: *till,
            }))
        }
        Token::ReplaceCharPrefix => Some(Token::Direct(DirectKind::ReplaceChar { ch })),
        _ => None,
    }
}

fn z_pending_token(code: KeyCode) -> Option<Token> {
    Z_BINDINGS
        .iter()
        .find(|b| b.matches(code))
        .map(|b| b.token)
}

fn goto_pending_token(code: KeyCode) -> Option<Token> {
    GOTO_BINDINGS
        .iter()
        .find(|b| b.matches(code))
        .map(|b| b.token)
}

fn op_pending_token(code: KeyCode, prev: &[Token]) -> Option<Token> {
    // The most recent Op token is the one we're following.
    let pending_op = prev.iter().rev().find_map(|t| match t {
        Token::Op(o) => Some(*o),
        _ => None,
    })?;

    // Operator key pressed again: SelfDouble (dd, yy, cc). Stays inline
    // because the matching key is determined by the active operator
    // rather than by a static table.
    let same_key = matches!(
        (pending_op, code),
        (Operator::Delete, KeyCode::Char('d'))
            | (Operator::Yank, KeyCode::Char('y'))
            | (Operator::Change, KeyCode::Char('c'))
            | (Operator::Indent, KeyCode::Char('>'))
            | (Operator::Dedent, KeyCode::Char('<'))
    );
    if same_key {
        return Some(Token::SelfDouble(pending_op));
    }

    OP_PENDING_BINDINGS
        .iter()
        .find(|b| b.matches(code))
        .map(|b| b.token)
}

fn ascii_digit(code: KeyCode) -> Option<char> {
    match code {
        KeyCode::Char(c) if c.is_ascii_digit() => Some(c),
        _ => None,
    }
}

fn object_token(code: KeyCode) -> Option<Token> {
    OBJECT_BINDINGS
        .iter()
        .find(|b| b.matches(code))
        .map(|b| b.token)
}

// ────────────────────────────────────────────────────────────────────────
// Count helpers
// ────────────────────────────────────────────────────────────────────────

/// Peel leading `Count(_)` tokens off the slice and combine them into one
/// number (with `1` as default when none are present).
fn take_count(tokens: &[Token]) -> (u32, &[Token]) {
    let mut count: u32 = 0;
    let mut i = 0;
    while let Some(Token::Count(d)) = tokens.get(i) {
        count = count.saturating_mul(10).saturating_add(*d);
        i += 1;
    }
    if i == 0 {
        (1, tokens)
    } else {
        (count.max(1), &tokens[i..])
    }
}

// ────────────────────────────────────────────────────────────────────────
// classify + build_expr
// ────────────────────────────────────────────────────────────────────────

/// Try to interpret the current token list. Returns Complete with the
/// resulting Expr when the list is a finished command, Incomplete when
/// it's a valid prefix of one, or Invalid otherwise.
pub(in crate::app) fn classify(tokens: &[Token]) -> Parse {
    if let Some(expr) = build_expr(tokens) {
        return Parse::Complete(expr);
    }
    if is_valid_prefix(tokens) {
        return Parse::Incomplete;
    }
    Parse::Invalid
}

fn build_expr(tokens: &[Token]) -> Option<Expr> {
    use Token::*;
    let (outer_count, rest) = take_count(tokens);

    match rest {
        // Direct standalone — count usually meaningless, kept for parity.
        [Direct(d)] => Some(Expr::Direct {
            kind: *d,
            count: outer_count,
        }),

        // Motion alone or with leading count (already captured).
        [Motion(m)] => Some(Expr::Motion(MotionExpr {
            motion: *m,
            count: outer_count,
        })),

        // `f<c>` / `t<c>` / etc — the prefix is purely a parser
        // shaping token and disappears at the AST level.
        [FindCharPrefix { .. }, Motion(m)] => Some(Expr::Motion(MotionExpr {
            motion: *m,
            count: outer_count,
        })),

        // Leader-style: <space>f, <space>l
        [LeaderPrefix, Direct(d)] => Some(Expr::Direct {
            kind: *d,
            count: outer_count,
        }),

        // gg → file start (with optional count: 5gg = goto line 5)
        [GotoPrefix, GotoPrefix] => Some(Expr::Motion(MotionExpr {
            motion: MotionKind::FileStart,
            count: outer_count,
        })),

        // gd / gr — goto-prefix followed by an LSP action
        [GotoPrefix, Direct(d)] => Some(Expr::Direct {
            kind: *d,
            count: outer_count,
        }),

        // g_ / ge / gE / gs / gl / gc / gb — goto-prefix followed by
        // a motion. Drops the prefix at the AST level.
        [GotoPrefix, Motion(m)] => Some(Expr::Motion(MotionExpr {
            motion: *m,
            count: outer_count,
        })),

        // zz / zt / zb — z-prefix followed by a viewport direct.
        [ZPrefix, Direct(d)] => Some(Expr::Direct {
            kind: *d,
            count: outer_count,
        }),

        // `r<c>` — the prefix is purely a parser shaping token; the
        // emitted `ReplaceChar` direct carries the typed character.
        [ReplaceCharPrefix, Direct(d)] => Some(Expr::Direct {
            kind: *d,
            count: outer_count,
        }),

        // Operator + something
        [Op(op), inner @ ..] => build_op_expr(*op, inner, outer_count),

        _ => None,
    }
}

fn build_op_expr(op: Operator, after_op: &[Token], outer_count: u32) -> Option<Expr> {
    use Token::*;
    let (motion_count, body) = take_count(after_op);

    match body {
        // dd / yy / cc
        [SelfDouble(_)] => Some(Expr::Op {
            op,
            target: Target::LineWise,
            outer_count: outer_count.saturating_mul(motion_count),
        }),

        // dw / 3dw / d3w / 3d2w — motion-based
        [Motion(m)] => Some(Expr::Op {
            op,
            target: Target::Motion(MotionExpr {
                motion: *m,
                count: motion_count,
            }),
            outer_count,
        }),

        // `df<c>` / `2dt<c>` — operator followed by a char-find motion.
        // The FindCharPrefix is a parser shaping token and is dropped
        // from the AST.
        [FindCharPrefix { .. }, Motion(m)] => Some(Expr::Op {
            op,
            target: Target::Motion(MotionExpr {
                motion: *m,
                count: motion_count,
            }),
            outer_count,
        }),

        // `dg_` / `dge` / etc — operator followed by a `g`-prefixed
        // motion. Same parser-shaping treatment as the find-char case.
        [GotoPrefix, Motion(m)] => Some(Expr::Op {
            op,
            target: Target::Motion(MotionExpr {
                motion: *m,
                count: motion_count,
            }),
            outer_count,
        }),

        // `cgn` / `dgn` / `ygn` (and the `gN` variants) — operator
        // followed by the gn target. Doesn't fit `Target::Motion`
        // because the range starts at the match (not the cursor); use
        // the dedicated `SearchMatch` target.
        [GotoPrefix, Direct(DirectKind::SearchSelectNext { reverse })] => Some(Expr::Op {
            op,
            target: Target::SearchMatch { reverse: *reverse },
            outer_count: outer_count.saturating_mul(motion_count),
        }),

        // dib / di" — text objects (motion_count must be 1; multi-count
        // on a text object isn't supported yet)
        [Scope(s), Object(o)] if motion_count == 1 => Some(Expr::Op {
            op,
            target: Target::TextObject {
                scope: *s,
                object: *o,
            },
            outer_count,
        }),

        _ => None,
    }
}

/// True if the token slice is the prefix of some buildable command.
/// Used to decide between Incomplete (keep accumulating) and Invalid
/// (clear and beep).
fn is_valid_prefix(tokens: &[Token]) -> bool {
    use Token::*;
    // Strip leading counts — they're transparent to validity.
    let (_, rest) = take_count(tokens);
    match rest {
        [] => true,                                  // just counts so far
        [LeaderPrefix] => true,                      // <space> waiting for follower
        [GotoPrefix] => true,                        // g waiting for the second g
        [ZPrefix] => true,                           // z waiting for z/t/b
        [FindCharPrefix { .. }] => true,             // f/F/t/T waiting for the literal char
        [ReplaceCharPrefix] => true,                 // r waiting for the replacement
        [Op(_)] => true,                             // d / y / c waiting
        [Op(_), Scope(_)] => true,                   // di waiting for an object
        [Op(_), FindCharPrefix { .. }] => true,      // df / dt waiting for the char
        [Op(_), GotoPrefix] => true,                 // dg waiting for the follower
        [Op(_), Count(_), ..] => {
            // After Op + inner counts the only continuations we can
            // still extend are Scope (heading for a text object) and
            // FindCharPrefix (heading for an `f<c>` style target).
            let after_op = &rest[1..];
            let (_, after_inner_count) = take_count(after_op);
            matches!(
                after_inner_count,
                [] | [Scope(_)] | [FindCharPrefix { .. }]
            )
        }
        _ => false,
    }
}