ad_editor/exec/
mod.rs

1//! Sam style language for running edit commands using structural regular expressions
2use crate::{
3    buffer::{Buffer, GapBuffer},
4    dot::{Cur, Dot},
5    editor::Action,
6    regex::{self, Match},
7};
8use ad_event::Source;
9use std::{cmp::min, io::Write, iter::Peekable, str::Chars};
10
11mod addr;
12mod cached_stdin;
13mod char_iter;
14mod expr;
15
16use addr::ParseError;
17pub(crate) use addr::{Addr, AddrBase, Address};
18pub use cached_stdin::CachedStdin;
19pub(crate) use char_iter::IterBoundedChars;
20use expr::{Expr, ParseOutput};
21
22/// Variable usable in templates for injecting the current filename.
23/// (Following the naming convention used in Awk)
24const FNAME_VAR: &str = "$FILENAME";
25
26/// Errors that can be returned by the exec engine
27#[derive(Debug, Clone, PartialEq, Eq)]
28pub enum Error {
29    /// Empty expression group
30    EmptyExpressionGroup,
31    /// Empty branch for an expression group
32    EmptyExpressionGroupBranch,
33    /// Empty program
34    EmptyProgram,
35    /// Unexpected end of file
36    Eof,
37    /// Invalid regex
38    InvalidRegex(regex::Error),
39    /// Invalid substitution
40    InvalidSubstitution(usize),
41    /// Invalid suffix
42    InvalidSuffix,
43    /// Missing action
44    MissingAction,
45    /// Missing delimiter
46    MissingDelimiter(&'static str),
47    /// Unclosed delimiter
48    UnclosedDelimiter(&'static str, char),
49    /// Unclosed expression group
50    UnclosedExpressionGroup,
51    /// Unclosed expression group branch
52    UnclosedExpressionGroupBranch,
53    /// Unexpected character
54    UnexpectedCharacter(char),
55}
56
57impl From<regex::Error> for Error {
58    fn from(err: regex::Error) -> Self {
59        Error::InvalidRegex(err)
60    }
61}
62
63/// Something that can be edited by a Program
64pub trait Edit: Address {
65    /// Extract the content of a previous submatch so it can be used in templating
66    fn submatch(&self, m: &Match, n: usize) -> Option<String> {
67        let (from, to) = m.sub_loc(n)?;
68        Some(self.iter_between(from, to).map(|(_, ch)| ch).collect())
69    }
70
71    /// Insert a string at the specified index
72    fn insert(&mut self, ix: usize, s: &str);
73
74    /// Remove all characters from (from..to)
75    fn remove(&mut self, from: usize, to: usize);
76
77    /// Mark the start of an edit transaction
78    fn begin_edit_transaction(&mut self) {}
79
80    /// Mark the end of an edit transaction
81    fn end_edit_transaction(&mut self) {}
82}
83
84impl Edit for GapBuffer {
85    fn insert(&mut self, idx: usize, s: &str) {
86        self.insert_str(idx, s)
87    }
88
89    fn remove(&mut self, from: usize, to: usize) {
90        self.remove_range(from, to);
91    }
92}
93
94impl Edit for Buffer {
95    fn insert(&mut self, idx: usize, s: &str) {
96        self.dot = Dot::Cur { c: Cur { idx } };
97        self.handle_action(Action::InsertString { s: s.to_string() }, Source::Fsys);
98    }
99
100    fn remove(&mut self, from: usize, to: usize) {
101        if from == to {
102            return;
103        }
104        self.dot = Dot::from_char_indices(from, to.saturating_sub(1)).collapse_null_range();
105        self.handle_action(Action::Delete, Source::Fsys);
106    }
107
108    fn begin_edit_transaction(&mut self) {
109        self.new_edit_log_transaction()
110    }
111
112    fn end_edit_transaction(&mut self) {
113        self.new_edit_log_transaction()
114    }
115}
116
117/// A parsed and compiled program that can be executed against an input
118#[derive(Debug, Clone, PartialEq, Eq)]
119pub struct Program {
120    initial_dot: Addr,
121    exprs: Vec<Expr>,
122}
123
124impl Program {
125    /// Attempt to parse a given program input
126    pub fn try_parse(s: &str) -> Result<Self, Error> {
127        let mut exprs = vec![];
128        let mut it = s.trim().chars().peekable();
129
130        if it.peek().is_none() {
131            return Err(Error::EmptyProgram);
132        }
133
134        let initial_dot = match Addr::parse(&mut it) {
135            Ok(dot_expr) => dot_expr,
136
137            // If the start of input is not an address we default to Full and attempt to parse the
138            // rest of the program. We need to reconstruct the iterator here as we may have
139            // advanced through the string while we attempt to parse the initial address.
140            Err(ParseError::NotAnAddress) => {
141                it = s.trim().chars().peekable();
142                Addr::full()
143            }
144
145            Err(ParseError::InvalidRegex(e)) => return Err(Error::InvalidRegex(e)),
146            Err(ParseError::UnclosedDelimiter) => {
147                return Err(Error::UnclosedDelimiter("dot expr regex", '/'))
148            }
149            Err(ParseError::UnexpectedCharacter(c)) => return Err(Error::UnexpectedCharacter(c)),
150            Err(ParseError::InvalidSuffix) => return Err(Error::InvalidSuffix),
151        };
152
153        consume_whitespace(&mut it);
154
155        loop {
156            if it.peek().is_none() {
157                break;
158            }
159
160            match Expr::try_parse(&mut it) {
161                Ok(ParseOutput::Single(expr)) => {
162                    exprs.push(expr);
163                    consume_whitespace(&mut it);
164                }
165                Ok(ParseOutput::Pair(e1, e2)) => {
166                    exprs.extend([e1, e2]);
167                    consume_whitespace(&mut it);
168                }
169                Err(Error::Eof) => break,
170                Err(e) => return Err(e),
171            }
172        }
173
174        if exprs.is_empty() {
175            return Ok(Self { initial_dot, exprs });
176        }
177
178        validate(&exprs)?;
179
180        Ok(Self { initial_dot, exprs })
181    }
182
183    /// Execute this program against a given Edit
184    pub fn execute<E, W>(&mut self, ed: &mut E, fname: &str, out: &mut W) -> Result<Dot, Error>
185    where
186        E: Edit,
187        W: Write,
188    {
189        let initial_dot = ed.map_addr(&mut self.initial_dot);
190
191        if self.exprs.is_empty() {
192            return Ok(initial_dot);
193        }
194
195        let (from, to) = initial_dot.as_char_indices();
196        let initial = &Match::synthetic(from, to.saturating_add(1));
197
198        ed.begin_edit_transaction();
199        let (from, to) = self.step(ed, initial, 0, fname, out)?.as_char_indices();
200        ed.end_edit_transaction();
201
202        // In the case of running against a lazy stream our initial `to` will be a sential value of
203        // usize::MAX which needs to be clamped to the size of the input. For Buffers and GapBuffers
204        // where we know that we should already be in bounds this is not required but the overhead
205        // of always doing it is minimal as checking the number of chars in the buffer is O(1) due
206        // to us caching the value.
207        let ix_max = ed.len_chars();
208
209        Ok(Dot::from_char_indices(min(from, ix_max), min(to, ix_max)))
210    }
211
212    fn step<E, W>(
213        &mut self,
214        ed: &mut E,
215        m: &Match,
216        pc: usize,
217        fname: &str,
218        out: &mut W,
219    ) -> Result<Dot, Error>
220    where
221        E: Edit,
222        W: Write,
223    {
224        let (mut from, to) = m.loc();
225
226        match self.exprs[pc].clone() {
227            Expr::Group(g) => {
228                let mut dot = Dot::from_char_indices(from, to);
229                for exprs in g {
230                    let mut p = Program {
231                        initial_dot: Addr::Explicit(dot),
232                        exprs: exprs.clone(),
233                    };
234                    dot = p.step(ed, m, 0, fname, out)?;
235                }
236
237                Ok(dot)
238            }
239
240            Expr::LoopMatches(mut re) => {
241                let mut initial_matches = Vec::new();
242                while let Some(m) = re.match_iter(&mut ed.iter_between(from, to), from) {
243                    // It's possible for the Regex we're using to match a 0-length string which
244                    // would cause us to get stuck trying to advance to the next match position.
245                    // If this happens we advance from by a character to ensure that we search
246                    // further in the input.
247                    let mut new_from = m.loc().1;
248                    if new_from == from {
249                        new_from += 1;
250                    }
251                    from = new_from;
252
253                    initial_matches.push(m);
254
255                    if from >= to || from >= ed.max_iter() {
256                        break;
257                    }
258                }
259
260                self.apply_matches(initial_matches, ed, m, pc, fname, out)
261            }
262
263            Expr::LoopBetweenMatches(mut re) => {
264                let mut initial_matches = Vec::new();
265
266                while let Some(m) = re.match_iter(&mut ed.iter_between(from, to), from) {
267                    let (new_from, new_to) = m.loc();
268                    if from < new_from {
269                        initial_matches.push(Match::synthetic(from, new_from));
270                    }
271                    from = new_to;
272                    if from > to || from >= ed.max_iter() {
273                        break;
274                    }
275                }
276
277                if from < to {
278                    initial_matches.push(Match::synthetic(from, to));
279                }
280
281                self.apply_matches(initial_matches, ed, m, pc, fname, out)
282            }
283
284            Expr::IfContains(mut re) => {
285                if re.matches_iter(&mut ed.iter_between(from, to), from) {
286                    self.step(ed, m, pc + 1, fname, out)
287                } else {
288                    Ok(Dot::from_char_indices(from, to))
289                }
290            }
291
292            Expr::IfNotContains(mut re) => {
293                if !re.matches_iter(&mut ed.iter_between(from, to), from) {
294                    self.step(ed, m, pc + 1, fname, out)
295                } else {
296                    Ok(Dot::from_char_indices(from, to))
297                }
298            }
299
300            Expr::Print(pat) => {
301                let s = template_match(&pat, m, ed, fname)?;
302                write!(out, "{s}").expect("to be able to write");
303                Ok(Dot::from_char_indices(from, to))
304            }
305
306            Expr::Insert(pat) => {
307                let s = template_match(&pat, m, ed, fname)?;
308                ed.insert(from, &s);
309                Ok(Dot::from_char_indices(from, to + s.chars().count()))
310            }
311
312            Expr::Append(pat) => {
313                let s = template_match(&pat, m, ed, fname)?;
314                ed.insert(to, &s);
315                Ok(Dot::from_char_indices(from, to + s.chars().count()))
316            }
317
318            Expr::Change(pat) => {
319                let s = template_match(&pat, m, ed, fname)?;
320                ed.remove(from, to);
321                ed.insert(from, &s);
322                Ok(Dot::from_char_indices(from, from + s.chars().count()))
323            }
324
325            Expr::Delete => {
326                ed.remove(from, to);
327                Ok(Dot::from_char_indices(from, from))
328            }
329
330            Expr::Sub(mut re, pat) => match re.match_iter(&mut ed.iter_between(from, to), from) {
331                Some(m) => {
332                    let (mfrom, mto) = m.loc();
333                    let s = template_match(&pat, &m, ed, fname)?;
334                    ed.remove(mfrom, mto);
335                    ed.insert(mfrom, &s);
336                    Ok(Dot::from_char_indices(
337                        from,
338                        to - (mto - mfrom) + s.chars().count(),
339                    ))
340                }
341                None => Ok(Dot::from_char_indices(from, to)),
342            },
343        }
344    }
345
346    /// When looping over disjoint matches in the input we need to determine all of the initial
347    /// match points before we start making any edits as the edits may alter the semantics of
348    /// future matches.
349    fn apply_matches<E, W>(
350        &mut self,
351        initial_matches: Vec<Match>,
352        ed: &mut E,
353        m: &Match,
354        pc: usize,
355        fname: &str,
356        out: &mut W,
357    ) -> Result<Dot, Error>
358    where
359        E: Edit,
360        W: Write,
361    {
362        let mut offset: isize = 0;
363        let (from, to) = m.loc();
364        let mut dot = Dot::from_char_indices(from, to);
365
366        for mut m in initial_matches.into_iter() {
367            m.apply_offset(offset);
368
369            let cur_len = ed.len_chars();
370            dot = self.step(ed, &m, pc + 1, fname, out)?;
371            let new_len = ed.len_chars();
372            offset += new_len as isize - cur_len as isize;
373        }
374
375        Ok(dot)
376    }
377}
378
379fn consume_whitespace(it: &mut Peekable<Chars<'_>>) {
380    loop {
381        match it.peek() {
382            Some(ch) if ch.is_whitespace() => {
383                it.next();
384            }
385            _ => break,
386        }
387    }
388}
389
390fn validate(exprs: &[Expr]) -> Result<(), Error> {
391    use Expr::*;
392
393    if exprs.is_empty() {
394        return Err(Error::EmptyProgram);
395    }
396
397    // Groups branches must be valid sub-programs
398    for e in exprs.iter() {
399        if let Group(branches) = e {
400            for branch in branches.iter() {
401                validate(branch)?;
402            }
403        }
404    }
405
406    // Must end with an action
407    if !matches!(
408        exprs[exprs.len() - 1],
409        Group(_) | Insert(_) | Append(_) | Change(_) | Sub(_, _) | Print(_) | Delete
410    ) {
411        return Err(Error::MissingAction);
412    }
413
414    Ok(())
415}
416
417// FIXME: if a previous sub-match replacement injects a valid var name for a subsequent one
418// then we end up attempting to template THAT in a later iteration of the loop.
419fn template_match<E>(s: &str, m: &Match, ed: &E, fname: &str) -> Result<String, Error>
420where
421    E: Edit,
422{
423    let mut output = if s.contains(FNAME_VAR) {
424        s.replace(FNAME_VAR, fname)
425    } else {
426        s.to_string()
427    };
428
429    // replace newline and tab escapes with their literal equivalents
430    output = output.replace("\\n", "\n").replace("\\t", "\t");
431
432    let vars = ["$0", "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$9"];
433    for (n, var) in vars.iter().enumerate() {
434        if !s.contains(var) {
435            continue;
436        }
437        match ed.submatch(m, n) {
438            Some(sm) => output = output.replace(var, &sm.to_string()),
439            None => return Err(Error::InvalidSubstitution(n)),
440        }
441    }
442
443    Ok(output)
444}
445
446#[cfg(test)]
447mod tests {
448    use super::*;
449    use crate::{buffer::Buffer, editor::Action, regex::Regex};
450    use simple_test_case::test_case;
451    use Expr::*;
452
453    fn re(s: &str) -> Regex {
454        Regex::compile(s).unwrap()
455    }
456
457    #[test_case(", p/$0/", vec![Print("$0".to_string())]; "print all")]
458    #[test_case(", x/^.*$/ s/foo/bar/", vec![LoopMatches(re("^.*$")), Sub(re("foo"), "bar".to_string())]; "simple loop")]
459    #[test_case(", x/^.*$/ g/emacs/ d", vec![LoopMatches(re("^.*$")), IfContains(re("emacs")), Delete]; "loop filter")]
460    #[test]
461    fn parse_program_works(s: &str, expected: Vec<Expr>) {
462        let p = Program::try_parse(s).expect("valid input");
463        assert_eq!(
464            p,
465            Program {
466                initial_dot: Addr::full(),
467                exprs: expected
468            }
469        );
470    }
471
472    #[test_case("", Error::EmptyProgram; "empty program")]
473    #[test_case(", x/.*/", Error::MissingAction; "missing action")]
474    #[test]
475    fn parse_program_errors_correctly(s: &str, expected: Error) {
476        let res = Program::try_parse(s);
477        assert_eq!(res, Err(expected));
478    }
479
480    #[test_case(vec![Insert("X".to_string())], "Xfoo foo foo", (0, 12); "insert")]
481    #[test_case(vec![Append("X".to_string())], "foo foo fooX", (0, 12); "append")]
482    #[test_case(vec![Change("X".to_string())], "X", (0, 1); "change")]
483    #[test_case(vec![Delete], "", (0, 0); "delete")]
484    #[test_case(vec![Sub(re("oo"), "X".to_string())], "fX foo foo", (0, 10); "sub single")]
485    #[test_case(vec![LoopMatches(re("foo")), Delete], "  ", (2, 2); "loop delete")]
486    #[test_case(vec![LoopBetweenMatches(re("foo")), Delete], "foofoofoo", (6, 6); "loop between delete")]
487    #[test_case(vec![LoopMatches(re("foo")), Append("X".to_string())], "fooX fooX fooX", (10, 14); "loop change")]
488    #[test_case(vec![LoopBetweenMatches(re("foo")), Append("X".to_string())], "foo Xfoo Xfoo", (8, 10); "loop between change")]
489    #[test]
490    fn step_works(exprs: Vec<Expr>, expected: &str, expected_dot: (usize, usize)) {
491        let mut prog = Program {
492            initial_dot: Addr::full(),
493            exprs,
494        };
495        let mut b = Buffer::new_unnamed(0, "foo foo foo");
496        let dot = prog
497            .step(&mut b, &Match::synthetic(0, 11), 0, "test", &mut vec![])
498            .unwrap();
499
500        assert_eq!(&b.txt.to_string(), expected);
501        assert_eq!(dot.as_char_indices(), expected_dot);
502    }
503
504    #[test_case(", x/(t.)/ c/$1X/", "thXis is a teXst XstrXing"; "x c")]
505    #[test_case(", x/(t.)/ i/$1/", "ththis is a tetest t strtring"; "x i")]
506    #[test_case(", x/(t.)/ a/$1/", "ththis is a tetest t strtring"; "x a")]
507    #[test]
508    fn substitution_of_submatches_works(s: &str, expected: &str) {
509        let mut prog = Program::try_parse(s).unwrap();
510
511        let mut b = Buffer::new_unnamed(0, "this is a test string");
512        prog.execute(&mut b, "test", &mut vec![]).unwrap();
513        assert_eq!(&b.txt.to_string(), expected);
514    }
515
516    #[test]
517    fn loop_between_generates_the_correct_blocks() {
518        let mut prog = Program::try_parse(", y/ / p/>$0<\n/").unwrap();
519        let mut b = Buffer::new_unnamed(0, "this and that");
520        let mut output = Vec::new();
521        let dot = prog.execute(&mut b, "test", &mut output).unwrap();
522
523        let s = String::from_utf8(output).unwrap();
524        assert_eq!(s, ">this<\n>and<\n>that<\n");
525
526        let dot_content = dot.content(&b);
527        assert_eq!(dot_content, "that");
528    }
529
530    #[test_case(0, "/oo.fo/ d", "fo│foo"; "regex dot delete")]
531    #[test_case(2, "-/f/,/f/ d", "oo│foo"; "regex dot range delete")]
532    #[test_case(0, ", x/foo/ p/$0/", "foo│foo│foo"; "x print")]
533    #[test_case(0, ", x/foo/ i/X/", "Xfoo│Xfoo│Xfoo"; "x insert")]
534    #[test_case(0, ", x/foo/ a/X/", "fooX│fooX│fooX"; "x append")]
535    #[test_case(0, ", x/foo/ c/X/", "X│X│X"; "x change")]
536    #[test_case(0, ", x/foo/ c/XX/", "XX│XX│XX"; "x change 2")]
537    #[test_case(0, ", x/foo/ d", "││"; "x delete")]
538    #[test_case(0, ", x/foo/ s/o/X/", "fXo│fXo│fXo"; "x substitute")]
539    #[test_case(0, ", y/foo/ p/>$0</", "foo│foo│foo"; "y print")]
540    #[test_case(0, ", y/foo/ i/X/", "fooX│fooX│fooX"; "y insert")]
541    #[test_case(0, ", y/foo/ a/X/", "foo│Xfoo│XfooX"; "y append")]
542    #[test_case(0, ", y/foo/ c/X/", "fooXfooXfooX"; "y change")]
543    #[test_case(0, ", y/foo/ d", "foofoofoo"; "y delete")]
544    #[test_case(0, ", y/│/ d", "││"; "y delete 2")]
545    #[test_case(0, ", s/oo/X/", "fX│foo│foo"; "sub single")]
546    #[test_case(0, ", s/\\w+/X/", "X│foo│foo"; "sub word single")]
547    #[test_case(0, ", s/oo/X/g", "fX│fX│fX"; "sub all")]
548    #[test_case(0, ", s/.*/X/g", "X"; "sub all dot star")]
549    #[test_case(0, ", x/\\b\\w+\\b/ c/X/", "X│X│X"; "change each word")]
550    #[test_case(0, ", x/foo/ s/o/X/g", "fXX│fXX│fXX"; "nested loop x substitute all")]
551    #[test_case(0, ", x/oo/ s/.*/X/g", "fX│fX│fX"; "nested loop x sub all dot star")]
552    #[test]
553    fn execute_produces_the_correct_string(idx: usize, s: &str, expected: &str) {
554        let mut prog = Program::try_parse(s).unwrap();
555        let mut b = Buffer::new_unnamed(0, "foo│foo│foo");
556        b.dot = Cur::new(idx).into();
557        prog.execute(&mut b, "test", &mut vec![]).unwrap();
558
559        assert_eq!(&b.txt.to_string(), expected, "buffer");
560    }
561
562    #[test]
563    fn multiline_file_dot_star_works() {
564        let mut prog = Program::try_parse(", x/.*/ c/foo/").unwrap();
565        let mut b = Buffer::new_unnamed(0, "this is\na multiline\nfile");
566        prog.execute(&mut b, "test", &mut vec![]).unwrap();
567
568        // '.*' will match the null string at the end of lines containing a newline as well
569        assert_eq!(&b.txt.to_string(), "foofoo\nfoofoo\nfoo");
570    }
571
572    #[test]
573    fn multiline_file_dot_plus_works() {
574        let mut prog = Program::try_parse(", x/.+/ c/foo/").unwrap();
575        let mut b = Buffer::new_unnamed(0, "this is\na multiline\nfile");
576        prog.execute(&mut b, "test", &mut vec![]).unwrap();
577
578        assert_eq!(&b.txt.to_string(), "foo\nfoo\nfoo");
579    }
580
581    #[test_case(", d"; "delete buffer")]
582    #[test_case(", x/th/ d"; "delete each th")]
583    #[test_case(", x/ / d"; "delete spaces")]
584    #[test_case(", s/ //g"; "sub remove spaces")]
585    #[test_case(", x/\\b\\w+\\b/ d"; "delete each word")]
586    #[test_case(", x/. / d"; "delete things before a space")]
587    #[test_case(", x/\\b\\w+\\b/ c/buffalo/"; "change each word")]
588    #[test_case(", x/\\b\\w+\\b/ a/buffalo/"; "append to each word")]
589    #[test_case(", x/\\b\\w+\\b/ i/buffalo/"; "insert before each word")]
590    #[test]
591    fn buffer_execute_undo_all_is_a_noop(s: &str) {
592        let mut prog = Program::try_parse(s).unwrap();
593        let initial_content = "this is a line\nand another\n- [ ] something to do\n";
594        let mut b = Buffer::new_unnamed(0, initial_content);
595
596        prog.execute(&mut b, "test", &mut vec![]).unwrap();
597        while b.handle_action(Action::Undo, Source::Keyboard).is_none() {}
598        let mut final_content = String::from_utf8(b.contents()).unwrap();
599        final_content.pop(); // The newline that we append
600
601        assert_eq!(&final_content, initial_content);
602    }
603
604    // regression test: gh#30
605    #[test]
606    fn regression_edit_landing_on_gap_end() {
607        // This is line 1 of the test file
608        let s = r#"6,32 x/.*\n/ y/\s+-- / x/(.+)/ c/"$1"/"#;
609        let mut prog = Program::try_parse(s).unwrap();
610        let mut b = Buffer::new_unnamed(
611            0,
612            include_str!("../../data/regression/edit-landing-on-gap-end.txt"),
613        );
614        prog.execute(&mut b, "test", &mut vec![]).unwrap();
615
616        let expected = r#""w | write"             -- "save the current buffer to disk. (Blocked if the file has been modified on disk)""#;
617        let final_content = String::from_utf8(b.contents()).unwrap();
618        assert_eq!(final_content.lines().nth(29).unwrap(), expected);
619    }
620}