Skip to main content

sed_rs/
command.rs

1use crate::error::{Error, Result};
2
3// ---------------------------------------------------------------------------
4// Types
5// ---------------------------------------------------------------------------
6
7#[derive(Debug, Clone)]
8pub enum Address {
9    /// A specific line number (1-indexed)
10    Line(usize),
11    /// The last line of input
12    Last,
13    /// Lines matching a regex pattern
14    Regex(String),
15    /// GNU extension: first~step (every step-th line starting at first)
16    Step { first: usize, step: usize },
17}
18
19#[derive(Debug, Clone)]
20pub enum AddressRange {
21    /// No address — matches every line
22    None,
23    /// Single address, optionally negated
24    Single { addr: Address, negated: bool },
25    /// Range of two addresses (inclusive), optionally negated
26    Range {
27        start: Address,
28        end: Address,
29        negated: bool,
30    },
31}
32
33#[derive(Debug, Clone)]
34pub struct SubstituteCmd {
35    pub pattern: String,
36    pub replacement: String,
37    pub global: bool,
38    pub print: bool,
39    pub nth: Option<usize>,
40    pub case_insensitive: bool,
41    pub write_file: Option<String>,
42}
43
44#[derive(Debug, Clone)]
45pub enum Command {
46    // -- Substitution & Transliteration --
47    Substitute(SubstituteCmd),
48    Transliterate { from: Vec<char>, to: Vec<char> },
49
50    // -- Output --
51    Print,
52    PrintFirstLine,
53    PrintLineNumber,
54    List,
55
56    // -- Deletion --
57    Delete,
58    DeleteFirstLine,
59
60    // -- Input --
61    Next,
62    NextAppend,
63
64    // -- Hold space --
65    HoldReplace,
66    HoldAppend,
67    GetReplace,
68    GetAppend,
69    Exchange,
70
71    // -- Text insertion --
72    Append(String),
73    Insert(String),
74    Change(String),
75
76    // -- Branching --
77    Label(String),
78    Branch(Option<String>),
79    BranchIfSub(Option<String>),
80    BranchIfNotSub(Option<String>),
81
82    // -- I/O --
83    ReadFile(String),
84    WriteFile(String),
85    WriteFirstLine(String),
86
87    // -- Control --
88    Quit(Option<i32>),
89    QuitNoprint(Option<i32>),
90    ClearPattern,
91    Noop,
92
93    // -- Grouping --
94    Block(Vec<SedCommand>),
95}
96
97#[derive(Debug, Clone)]
98pub struct SedCommand {
99    pub address: AddressRange,
100    pub command: Command,
101}
102
103// ---------------------------------------------------------------------------
104// Parser
105// ---------------------------------------------------------------------------
106
107pub fn parse(script: &str) -> Result<Vec<SedCommand>> {
108    let mut parser = Parser::new(script);
109    parser.parse_script()
110}
111
112struct Parser {
113    chars: Vec<char>,
114    pos: usize,
115}
116
117impl Parser {
118    fn new(input: &str) -> Self {
119        Self {
120            chars: input.chars().collect(),
121            pos: 0,
122        }
123    }
124
125    fn peek(&self) -> Option<char> {
126        self.chars.get(self.pos).copied()
127    }
128
129    fn advance(&mut self) -> Option<char> {
130        let c = self.chars.get(self.pos).copied();
131        if c.is_some() {
132            self.pos += 1;
133        }
134        c
135    }
136
137    fn is_at_end(&self) -> bool {
138        self.pos >= self.chars.len()
139    }
140
141    /// Consume the next char if it matches `c`, returning true if consumed.
142    fn consume_if(&mut self, c: char) -> bool {
143        if self.peek() == Some(c) {
144            self.advance();
145            true
146        } else {
147            false
148        }
149    }
150
151    /// Skip spaces and tabs (but NOT newlines).
152    fn skip_spaces(&mut self) {
153        while matches!(self.peek(), Some(' ' | '\t')) {
154            self.advance();
155        }
156    }
157
158    /// Skip whitespace including newlines, semicolons, and comments.
159    fn skip_blanks(&mut self) {
160        loop {
161            match self.peek() {
162                Some(' ' | '\t' | '\n' | '\r' | ';') => {
163                    self.advance();
164                }
165                Some('#') => self.skip_line(),
166                _ => break,
167            }
168        }
169    }
170
171    /// Skip to end of current line.
172    fn skip_line(&mut self) {
173        while let Some(c) = self.advance() {
174            if c == '\n' {
175                break;
176            }
177        }
178    }
179
180    // -- Top-level parsing --
181
182    fn parse_script(&mut self) -> Result<Vec<SedCommand>> {
183        let mut commands = Vec::new();
184        loop {
185            self.skip_blanks();
186            if self.is_at_end() {
187                break;
188            }
189            if let Some(cmd) = self.parse_one_command()? {
190                commands.push(cmd);
191            }
192        }
193        Ok(commands)
194    }
195
196    fn parse_one_command(&mut self) -> Result<Option<SedCommand>> {
197        self.skip_blanks();
198        if self.is_at_end() {
199            return Ok(None);
200        }
201
202        // Parse address range
203        let address = self.parse_address_range()?;
204        self.skip_spaces();
205
206        let Some(ch) = self.advance() else {
207            return Ok(None);
208        };
209
210        let command = match ch {
211            '{' => {
212                let block = self.parse_block()?;
213                Command::Block(block)
214            }
215            '}' => return Ok(None), // handled by parse_block
216            's' => self.parse_substitute()?,
217            'y' => self.parse_transliterate()?,
218            'd' => Command::Delete,
219            'D' => Command::DeleteFirstLine,
220            'p' => Command::Print,
221            'P' => Command::PrintFirstLine,
222            '=' => Command::PrintLineNumber,
223            'l' => Command::List,
224            'q' => Command::Quit(self.parse_optional_int()),
225            'Q' => Command::QuitNoprint(self.parse_optional_int()),
226            'h' => Command::HoldReplace,
227            'H' => Command::HoldAppend,
228            'g' => Command::GetReplace,
229            'G' => Command::GetAppend,
230            'x' => Command::Exchange,
231            'n' => Command::Next,
232            'N' => Command::NextAppend,
233            'z' => Command::ClearPattern,
234            'a' => Command::Append(self.parse_text_arg()),
235            'i' => Command::Insert(self.parse_text_arg()),
236            'c' => Command::Change(self.parse_text_arg()),
237            'r' => Command::ReadFile(self.parse_filename_arg()),
238            'w' => Command::WriteFile(self.parse_filename_arg()),
239            'W' => Command::WriteFirstLine(self.parse_filename_arg()),
240            'R' => Command::ReadFile(self.parse_filename_arg()),
241            'b' => Command::Branch(self.parse_label_arg()),
242            't' => Command::BranchIfSub(self.parse_label_arg()),
243            'T' => Command::BranchIfNotSub(self.parse_label_arg()),
244            ':' => {
245                let label = self.parse_label_arg().unwrap_or_default();
246                Command::Label(label)
247            }
248            '#' => {
249                self.skip_line();
250                Command::Noop
251            }
252            c if c.is_whitespace() => return self.parse_one_command(),
253            c => {
254                return Err(Error::Parse(format!("unknown command: '{c}'")));
255            }
256        };
257
258        Ok(Some(SedCommand { address, command }))
259    }
260
261    // -- Address parsing --
262
263    fn parse_address_range(&mut self) -> Result<AddressRange> {
264        let Some(addr1) = self.parse_address()? else {
265            // Check for negation without address
266            if self.consume_if('!') {
267                return Err(Error::Parse(
268                    "'!' without preceding address".into(),
269                ));
270            }
271            return Ok(AddressRange::None);
272        };
273
274        self.skip_spaces();
275
276        if self.consume_if(',') {
277            self.skip_spaces();
278            let addr2 = self.parse_address()?.ok_or_else(|| {
279                Error::Parse("expected address after ','".into())
280            })?;
281            self.skip_spaces();
282            let negated = self.consume_if('!');
283            Ok(AddressRange::Range {
284                start: addr1,
285                end: addr2,
286                negated,
287            })
288        } else {
289            let negated = self.consume_if('!');
290            Ok(AddressRange::Single {
291                addr: addr1,
292                negated,
293            })
294        }
295    }
296
297    fn parse_address(&mut self) -> Result<Option<Address>> {
298        match self.peek() {
299            Some(c) if c.is_ascii_digit() => {
300                let n = self.parse_number();
301                if self.consume_if('~') {
302                    let step = self.parse_number();
303                    Ok(Some(Address::Step { first: n, step }))
304                } else {
305                    Ok(Some(Address::Line(n)))
306                }
307            }
308            Some('$') => {
309                self.advance();
310                Ok(Some(Address::Last))
311            }
312            Some('/') => {
313                self.advance();
314                let pattern = self.parse_regex_delimited('/')?;
315                Ok(Some(Address::Regex(pattern)))
316            }
317            Some('\\') => {
318                self.advance();
319                let delim = self.advance().ok_or_else(|| {
320                    Error::Parse("expected delimiter after '\\'".into())
321                })?;
322                let pattern = self.parse_regex_delimited(delim)?;
323                Ok(Some(Address::Regex(pattern)))
324            }
325            _ => Ok(None),
326        }
327    }
328
329    fn parse_number(&mut self) -> usize {
330        let mut n: usize = 0;
331        while let Some(c) = self.peek() {
332            if c.is_ascii_digit() {
333                n = n
334                    .saturating_mul(10)
335                    .saturating_add((c as u8 - b'0') as usize);
336                self.advance();
337            } else {
338                break;
339            }
340        }
341        n
342    }
343
344    fn parse_optional_int(&mut self) -> Option<i32> {
345        self.skip_spaces();
346        if self.peek().is_some_and(|c| c.is_ascii_digit()) {
347            Some(self.parse_number() as i32)
348        } else {
349            None
350        }
351    }
352
353    // -- Delimited content parsing --
354
355    /// Parse content between matching delimiters, handling backslash escapes.
356    fn parse_regex_delimited(&mut self, delim: char) -> Result<String> {
357        let mut s = String::new();
358        loop {
359            match self.advance() {
360                None => {
361                    return Err(Error::Parse(format!(
362                        "unterminated regex (expected closing '{delim}')"
363                    )));
364                }
365                Some(c) if c == delim => return Ok(s),
366                Some('\\') => {
367                    if let Some(next) = self.advance() {
368                        if next == delim {
369                            // Escaped delimiter → literal delimiter in regex
370                            s.push('\\');
371                            s.push(next);
372                        } else {
373                            s.push('\\');
374                            s.push(next);
375                        }
376                    } else {
377                        s.push('\\');
378                    }
379                }
380                Some(c) => s.push(c),
381            }
382        }
383    }
384
385    /// Parse content between delimiters for s/// replacement strings.
386    /// Does NOT add extra backslash escaping (preserves sed replacement syntax).
387    fn parse_replacement_delimited(&mut self, delim: char) -> Result<String> {
388        let mut s = String::new();
389        loop {
390            match self.advance() {
391                None => {
392                    return Err(Error::Parse(format!(
393                        "unterminated replacement (expected closing '{delim}')"
394                    )));
395                }
396                Some(c) if c == delim => return Ok(s),
397                Some('\\') => {
398                    if let Some(next) = self.advance() {
399                        if next == delim {
400                            s.push(next);
401                        } else {
402                            s.push('\\');
403                            s.push(next);
404                        }
405                    } else {
406                        s.push('\\');
407                    }
408                }
409                Some(c) => s.push(c),
410            }
411        }
412    }
413
414    // -- Command-specific parsing --
415
416    fn parse_substitute(&mut self) -> Result<Command> {
417        let delim = self.advance().ok_or_else(|| {
418            Error::Parse("missing delimiter for s command".into())
419        })?;
420        let pattern = self.parse_regex_delimited(delim)?;
421        let replacement = self.parse_replacement_delimited(delim)?;
422
423        let mut global = false;
424        let mut print = false;
425        let mut nth: Option<usize> = None;
426        let mut case_insensitive = false;
427        let mut write_file = None;
428
429        loop {
430            match self.peek() {
431                Some('g') => {
432                    self.advance();
433                    global = true;
434                }
435                Some('p') => {
436                    self.advance();
437                    print = true;
438                }
439                Some('i' | 'I') => {
440                    self.advance();
441                    case_insensitive = true;
442                }
443                Some('w') => {
444                    self.advance();
445                    self.skip_spaces();
446                    write_file = Some(self.parse_filename_arg());
447                    break;
448                }
449                Some(c) if c.is_ascii_digit() => {
450                    nth = Some(self.parse_number());
451                }
452                _ => break,
453            }
454        }
455
456        Ok(Command::Substitute(SubstituteCmd {
457            pattern,
458            replacement,
459            global,
460            print,
461            nth,
462            case_insensitive,
463            write_file,
464        }))
465    }
466
467    fn parse_transliterate(&mut self) -> Result<Command> {
468        let delim = self.advance().ok_or_else(|| {
469            Error::Parse("missing delimiter for y command".into())
470        })?;
471        let from_str = self.parse_regex_delimited(delim)?;
472        let to_str = self.parse_regex_delimited(delim)?;
473
474        let from: Vec<char> = from_str.chars().collect();
475        let to: Vec<char> = to_str.chars().collect();
476
477        if from.len() != to.len() {
478            return Err(Error::Parse(format!(
479                "y command: 'from' and 'to' must be same length ({} vs {})",
480                from.len(),
481                to.len()
482            )));
483        }
484
485        Ok(Command::Transliterate { from, to })
486    }
487
488    /// Parse text argument for a/i/c commands.
489    ///
490    /// Handles:
491    ///   a text       (GNU extension: text on same line)
492    ///   a\ text      (text after backslash)
493    ///   a\           (text on next line, with backslash continuation)
494    fn parse_text_arg(&mut self) -> String {
495        // Skip optional backslash
496        if self.peek() == Some('\\') {
497            self.advance();
498        }
499
500        // Skip one space or newline after command char / backslash
501        match self.peek() {
502            Some('\n') => {
503                self.advance();
504            }
505            Some(' ' | '\t') => {
506                self.advance();
507            }
508            _ => {}
509        }
510
511        let mut text = String::new();
512        loop {
513            match self.peek() {
514                None => break,
515                Some('\n') => {
516                    // Check for backslash continuation
517                    if text.ends_with('\\') {
518                        text.pop();
519                        text.push('\n');
520                        self.advance();
521                    } else {
522                        break;
523                    }
524                }
525                Some(c) => {
526                    text.push(c);
527                    self.advance();
528                }
529            }
530        }
531
532        text
533    }
534
535    /// Parse a label name (for b, t, T, : commands).
536    fn parse_label_arg(&mut self) -> Option<String> {
537        self.skip_spaces();
538        let mut label = String::new();
539        while let Some(c) = self.peek() {
540            if c.is_alphanumeric() || c == '_' || c == '.' || c == '-' {
541                label.push(c);
542                self.advance();
543            } else {
544                break;
545            }
546        }
547        if label.is_empty() {
548            None
549        } else {
550            Some(label)
551        }
552    }
553
554    /// Parse a filename argument (for r, w, W commands).
555    fn parse_filename_arg(&mut self) -> String {
556        self.skip_spaces();
557        let mut filename = String::new();
558        while let Some(c) = self.peek() {
559            if c == '\n' || c == ';' {
560                break;
561            }
562            filename.push(c);
563            self.advance();
564        }
565        filename.trim_end().to_string()
566    }
567
568    /// Parse a { ... } block of commands.
569    fn parse_block(&mut self) -> Result<Vec<SedCommand>> {
570        let mut commands = Vec::new();
571        loop {
572            self.skip_blanks();
573            if self.is_at_end() {
574                return Err(Error::Parse(
575                    "unterminated block (missing '}')".into(),
576                ));
577            }
578            if self.peek() == Some('}') {
579                self.advance();
580                break;
581            }
582            if let Some(cmd) = self.parse_one_command()? {
583                commands.push(cmd);
584            }
585        }
586        Ok(commands)
587    }
588}
589
590// ---------------------------------------------------------------------------
591// Tests
592// ---------------------------------------------------------------------------
593
594#[cfg(test)]
595mod tests {
596    use super::*;
597
598    #[test]
599    fn parse_simple_substitute() {
600        let cmds = parse("s/foo/bar/g").unwrap();
601        assert_eq!(cmds.len(), 1);
602        match &cmds[0].command {
603            Command::Substitute(s) => {
604                assert_eq!(s.pattern, "foo");
605                assert_eq!(s.replacement, "bar");
606                assert!(s.global);
607            }
608            other => panic!("expected Substitute, got {other:?}"),
609        }
610    }
611
612    #[test]
613    fn parse_substitute_custom_delim() {
614        let cmds = parse("s|foo|bar|").unwrap();
615        assert_eq!(cmds.len(), 1);
616        match &cmds[0].command {
617            Command::Substitute(s) => {
618                assert_eq!(s.pattern, "foo");
619                assert_eq!(s.replacement, "bar");
620                assert!(!s.global);
621            }
622            other => panic!("expected Substitute, got {other:?}"),
623        }
624    }
625
626    #[test]
627    fn parse_address_line() {
628        let cmds = parse("3d").unwrap();
629        assert_eq!(cmds.len(), 1);
630        match &cmds[0].address {
631            AddressRange::Single {
632                addr: Address::Line(3),
633                negated: false,
634            } => {}
635            other => panic!("unexpected address: {other:?}"),
636        }
637    }
638
639    #[test]
640    fn parse_address_range_lines() {
641        let cmds = parse("1,10d").unwrap();
642        assert_eq!(cmds.len(), 1);
643        match &cmds[0].address {
644            AddressRange::Range {
645                start: Address::Line(1),
646                end: Address::Line(10),
647                negated: false,
648            } => {}
649            other => panic!("unexpected address: {other:?}"),
650        }
651    }
652
653    #[test]
654    fn parse_address_regex() {
655        let cmds = parse("/^foo/d").unwrap();
656        assert_eq!(cmds.len(), 1);
657        match &cmds[0].address {
658            AddressRange::Single {
659                addr: Address::Regex(re),
660                negated: false,
661            } => assert_eq!(re, "^foo"),
662            other => panic!("unexpected address: {other:?}"),
663        }
664    }
665
666    #[test]
667    fn parse_negated() {
668        let cmds = parse("/foo/!d").unwrap();
669        assert_eq!(cmds.len(), 1);
670        match &cmds[0].address {
671            AddressRange::Single {
672                addr: Address::Regex(_),
673                negated: true,
674            } => {}
675            other => panic!("unexpected address: {other:?}"),
676        }
677    }
678
679    #[test]
680    fn parse_multiple_commands() {
681        let cmds = parse("s/a/b/; s/c/d/").unwrap();
682        assert_eq!(cmds.len(), 2);
683    }
684
685    #[test]
686    fn parse_block() {
687        let cmds = parse("/foo/ { s/a/b/; s/c/d/ }").unwrap();
688        assert_eq!(cmds.len(), 1);
689        match &cmds[0].command {
690            Command::Block(block) => assert_eq!(block.len(), 2),
691            other => panic!("expected Block, got {other:?}"),
692        }
693    }
694
695    #[test]
696    fn parse_transliterate() {
697        let cmds = parse("y/abc/xyz/").unwrap();
698        assert_eq!(cmds.len(), 1);
699        match &cmds[0].command {
700            Command::Transliterate { from, to } => {
701                assert_eq!(from, &['a', 'b', 'c']);
702                assert_eq!(to, &['x', 'y', 'z']);
703            }
704            other => panic!("expected Transliterate, got {other:?}"),
705        }
706    }
707
708    #[test]
709    fn parse_labels_and_branches() {
710        let cmds = parse(":loop\ns/foo/bar/\nt loop").unwrap();
711        assert_eq!(cmds.len(), 3);
712        match &cmds[0].command {
713            Command::Label(l) => assert_eq!(l, "loop"),
714            other => panic!("expected Label, got {other:?}"),
715        }
716        match &cmds[2].command {
717            Command::BranchIfSub(Some(l)) => assert_eq!(l, "loop"),
718            other => panic!("expected BranchIfSub, got {other:?}"),
719        }
720    }
721
722    #[test]
723    fn parse_append_text() {
724        let cmds = parse("a hello world").unwrap();
725        assert_eq!(cmds.len(), 1);
726        match &cmds[0].command {
727            Command::Append(t) => assert_eq!(t, "hello world"),
728            other => panic!("expected Append, got {other:?}"),
729        }
730    }
731
732    #[test]
733    fn parse_last_line_address() {
734        let cmds = parse("$d").unwrap();
735        assert_eq!(cmds.len(), 1);
736        match &cmds[0].address {
737            AddressRange::Single {
738                addr: Address::Last,
739                ..
740            } => {}
741            other => panic!("expected Last address, got {other:?}"),
742        }
743    }
744
745    #[test]
746    fn parse_step_address() {
747        let cmds = parse("0~2d").unwrap();
748        assert_eq!(cmds.len(), 1);
749        match &cmds[0].address {
750            AddressRange::Single {
751                addr: Address::Step { first: 0, step: 2 },
752                ..
753            } => {}
754            other => panic!("expected Step address, got {other:?}"),
755        }
756    }
757}