1use crate::error::{Error, Result};
2
3#[derive(Debug, Clone)]
8pub enum Address {
9 Line(usize),
11 Last,
13 Regex(String),
15 Step { first: usize, step: usize },
17}
18
19#[derive(Debug, Clone)]
20pub enum AddressRange {
21 None,
23 Single { addr: Address, negated: bool },
25 Range {
27 start: Address,
28 end: Address,
29 negated: bool,
30 },
31}
32
33#[derive(Debug, Clone)]
34pub struct SubstituteCmd {
35 pub pattern: String,
36 pub replacement: String,
37 pub global: bool,
38 pub print: bool,
39 pub nth: Option<usize>,
40 pub case_insensitive: bool,
41 pub write_file: Option<String>,
42}
43
44#[derive(Debug, Clone)]
45pub enum Command {
46 Substitute(SubstituteCmd),
48 Transliterate { from: Vec<char>, to: Vec<char> },
49
50 Print,
52 PrintFirstLine,
53 PrintLineNumber,
54 List,
55
56 Delete,
58 DeleteFirstLine,
59
60 Next,
62 NextAppend,
63
64 HoldReplace,
66 HoldAppend,
67 GetReplace,
68 GetAppend,
69 Exchange,
70
71 Append(String),
73 Insert(String),
74 Change(String),
75
76 Label(String),
78 Branch(Option<String>),
79 BranchIfSub(Option<String>),
80 BranchIfNotSub(Option<String>),
81
82 ReadFile(String),
84 WriteFile(String),
85 WriteFirstLine(String),
86
87 Quit(Option<i32>),
89 QuitNoprint(Option<i32>),
90 ClearPattern,
91 Noop,
92
93 Block(Vec<SedCommand>),
95}
96
97#[derive(Debug, Clone)]
98pub struct SedCommand {
99 pub address: AddressRange,
100 pub command: Command,
101}
102
103pub fn parse(script: &str) -> Result<Vec<SedCommand>> {
108 let mut parser = Parser::new(script);
109 parser.parse_script()
110}
111
112struct Parser {
113 chars: Vec<char>,
114 pos: usize,
115}
116
117impl Parser {
118 fn new(input: &str) -> Self {
119 Self {
120 chars: input.chars().collect(),
121 pos: 0,
122 }
123 }
124
125 fn peek(&self) -> Option<char> {
126 self.chars.get(self.pos).copied()
127 }
128
129 fn advance(&mut self) -> Option<char> {
130 let c = self.chars.get(self.pos).copied();
131 if c.is_some() {
132 self.pos += 1;
133 }
134 c
135 }
136
137 fn is_at_end(&self) -> bool {
138 self.pos >= self.chars.len()
139 }
140
141 fn consume_if(&mut self, c: char) -> bool {
143 if self.peek() == Some(c) {
144 self.advance();
145 true
146 } else {
147 false
148 }
149 }
150
151 fn skip_spaces(&mut self) {
153 while matches!(self.peek(), Some(' ' | '\t')) {
154 self.advance();
155 }
156 }
157
158 fn skip_blanks(&mut self) {
160 loop {
161 match self.peek() {
162 Some(' ' | '\t' | '\n' | '\r' | ';') => {
163 self.advance();
164 }
165 Some('#') => self.skip_line(),
166 _ => break,
167 }
168 }
169 }
170
171 fn skip_line(&mut self) {
173 while let Some(c) = self.advance() {
174 if c == '\n' {
175 break;
176 }
177 }
178 }
179
180 fn parse_script(&mut self) -> Result<Vec<SedCommand>> {
183 let mut commands = Vec::new();
184 loop {
185 self.skip_blanks();
186 if self.is_at_end() {
187 break;
188 }
189 if let Some(cmd) = self.parse_one_command()? {
190 commands.push(cmd);
191 }
192 }
193 Ok(commands)
194 }
195
196 fn parse_one_command(&mut self) -> Result<Option<SedCommand>> {
197 self.skip_blanks();
198 if self.is_at_end() {
199 return Ok(None);
200 }
201
202 let address = self.parse_address_range()?;
204 self.skip_spaces();
205
206 let Some(ch) = self.advance() else {
207 return Ok(None);
208 };
209
210 let command = match ch {
211 '{' => {
212 let block = self.parse_block()?;
213 Command::Block(block)
214 }
215 '}' => return Ok(None), 's' => self.parse_substitute()?,
217 'y' => self.parse_transliterate()?,
218 'd' => Command::Delete,
219 'D' => Command::DeleteFirstLine,
220 'p' => Command::Print,
221 'P' => Command::PrintFirstLine,
222 '=' => Command::PrintLineNumber,
223 'l' => Command::List,
224 'q' => Command::Quit(self.parse_optional_int()),
225 'Q' => Command::QuitNoprint(self.parse_optional_int()),
226 'h' => Command::HoldReplace,
227 'H' => Command::HoldAppend,
228 'g' => Command::GetReplace,
229 'G' => Command::GetAppend,
230 'x' => Command::Exchange,
231 'n' => Command::Next,
232 'N' => Command::NextAppend,
233 'z' => Command::ClearPattern,
234 'a' => Command::Append(self.parse_text_arg()),
235 'i' => Command::Insert(self.parse_text_arg()),
236 'c' => Command::Change(self.parse_text_arg()),
237 'r' => Command::ReadFile(self.parse_filename_arg()),
238 'w' => Command::WriteFile(self.parse_filename_arg()),
239 'W' => Command::WriteFirstLine(self.parse_filename_arg()),
240 'R' => Command::ReadFile(self.parse_filename_arg()),
241 'b' => Command::Branch(self.parse_label_arg()),
242 't' => Command::BranchIfSub(self.parse_label_arg()),
243 'T' => Command::BranchIfNotSub(self.parse_label_arg()),
244 ':' => {
245 let label = self.parse_label_arg().unwrap_or_default();
246 Command::Label(label)
247 }
248 '#' => {
249 self.skip_line();
250 Command::Noop
251 }
252 c if c.is_whitespace() => return self.parse_one_command(),
253 c => {
254 return Err(Error::Parse(format!("unknown command: '{c}'")));
255 }
256 };
257
258 Ok(Some(SedCommand { address, command }))
259 }
260
261 fn parse_address_range(&mut self) -> Result<AddressRange> {
264 let Some(addr1) = self.parse_address()? else {
265 if self.consume_if('!') {
267 return Err(Error::Parse(
268 "'!' without preceding address".into(),
269 ));
270 }
271 return Ok(AddressRange::None);
272 };
273
274 self.skip_spaces();
275
276 if self.consume_if(',') {
277 self.skip_spaces();
278 let addr2 = self.parse_address()?.ok_or_else(|| {
279 Error::Parse("expected address after ','".into())
280 })?;
281 self.skip_spaces();
282 let negated = self.consume_if('!');
283 Ok(AddressRange::Range {
284 start: addr1,
285 end: addr2,
286 negated,
287 })
288 } else {
289 let negated = self.consume_if('!');
290 Ok(AddressRange::Single {
291 addr: addr1,
292 negated,
293 })
294 }
295 }
296
297 fn parse_address(&mut self) -> Result<Option<Address>> {
298 match self.peek() {
299 Some(c) if c.is_ascii_digit() => {
300 let n = self.parse_number();
301 if self.consume_if('~') {
302 let step = self.parse_number();
303 Ok(Some(Address::Step { first: n, step }))
304 } else {
305 Ok(Some(Address::Line(n)))
306 }
307 }
308 Some('$') => {
309 self.advance();
310 Ok(Some(Address::Last))
311 }
312 Some('/') => {
313 self.advance();
314 let pattern = self.parse_regex_delimited('/')?;
315 Ok(Some(Address::Regex(pattern)))
316 }
317 Some('\\') => {
318 self.advance();
319 let delim = self.advance().ok_or_else(|| {
320 Error::Parse("expected delimiter after '\\'".into())
321 })?;
322 let pattern = self.parse_regex_delimited(delim)?;
323 Ok(Some(Address::Regex(pattern)))
324 }
325 _ => Ok(None),
326 }
327 }
328
329 fn parse_number(&mut self) -> usize {
330 let mut n: usize = 0;
331 while let Some(c) = self.peek() {
332 if c.is_ascii_digit() {
333 n = n
334 .saturating_mul(10)
335 .saturating_add((c as u8 - b'0') as usize);
336 self.advance();
337 } else {
338 break;
339 }
340 }
341 n
342 }
343
344 fn parse_optional_int(&mut self) -> Option<i32> {
345 self.skip_spaces();
346 if self.peek().is_some_and(|c| c.is_ascii_digit()) {
347 Some(self.parse_number() as i32)
348 } else {
349 None
350 }
351 }
352
353 fn parse_regex_delimited(&mut self, delim: char) -> Result<String> {
357 let mut s = String::new();
358 loop {
359 match self.advance() {
360 None => {
361 return Err(Error::Parse(format!(
362 "unterminated regex (expected closing '{delim}')"
363 )));
364 }
365 Some(c) if c == delim => return Ok(s),
366 Some('\\') => {
367 if let Some(next) = self.advance() {
368 if next == delim {
369 s.push('\\');
371 s.push(next);
372 } else {
373 s.push('\\');
374 s.push(next);
375 }
376 } else {
377 s.push('\\');
378 }
379 }
380 Some(c) => s.push(c),
381 }
382 }
383 }
384
385 fn parse_replacement_delimited(&mut self, delim: char) -> Result<String> {
388 let mut s = String::new();
389 loop {
390 match self.advance() {
391 None => {
392 return Err(Error::Parse(format!(
393 "unterminated replacement (expected closing '{delim}')"
394 )));
395 }
396 Some(c) if c == delim => return Ok(s),
397 Some('\\') => {
398 if let Some(next) = self.advance() {
399 if next == delim {
400 s.push(next);
401 } else {
402 s.push('\\');
403 s.push(next);
404 }
405 } else {
406 s.push('\\');
407 }
408 }
409 Some(c) => s.push(c),
410 }
411 }
412 }
413
414 fn parse_substitute(&mut self) -> Result<Command> {
417 let delim = self.advance().ok_or_else(|| {
418 Error::Parse("missing delimiter for s command".into())
419 })?;
420 let pattern = self.parse_regex_delimited(delim)?;
421 let replacement = self.parse_replacement_delimited(delim)?;
422
423 let mut global = false;
424 let mut print = false;
425 let mut nth: Option<usize> = None;
426 let mut case_insensitive = false;
427 let mut write_file = None;
428
429 loop {
430 match self.peek() {
431 Some('g') => {
432 self.advance();
433 global = true;
434 }
435 Some('p') => {
436 self.advance();
437 print = true;
438 }
439 Some('i' | 'I') => {
440 self.advance();
441 case_insensitive = true;
442 }
443 Some('w') => {
444 self.advance();
445 self.skip_spaces();
446 write_file = Some(self.parse_filename_arg());
447 break;
448 }
449 Some(c) if c.is_ascii_digit() => {
450 nth = Some(self.parse_number());
451 }
452 _ => break,
453 }
454 }
455
456 Ok(Command::Substitute(SubstituteCmd {
457 pattern,
458 replacement,
459 global,
460 print,
461 nth,
462 case_insensitive,
463 write_file,
464 }))
465 }
466
467 fn parse_transliterate(&mut self) -> Result<Command> {
468 let delim = self.advance().ok_or_else(|| {
469 Error::Parse("missing delimiter for y command".into())
470 })?;
471 let from_str = self.parse_regex_delimited(delim)?;
472 let to_str = self.parse_regex_delimited(delim)?;
473
474 let from: Vec<char> = from_str.chars().collect();
475 let to: Vec<char> = to_str.chars().collect();
476
477 if from.len() != to.len() {
478 return Err(Error::Parse(format!(
479 "y command: 'from' and 'to' must be same length ({} vs {})",
480 from.len(),
481 to.len()
482 )));
483 }
484
485 Ok(Command::Transliterate { from, to })
486 }
487
488 fn parse_text_arg(&mut self) -> String {
495 if self.peek() == Some('\\') {
497 self.advance();
498 }
499
500 match self.peek() {
502 Some('\n') => {
503 self.advance();
504 }
505 Some(' ' | '\t') => {
506 self.advance();
507 }
508 _ => {}
509 }
510
511 let mut text = String::new();
512 loop {
513 match self.peek() {
514 None => break,
515 Some('\n') => {
516 if text.ends_with('\\') {
518 text.pop();
519 text.push('\n');
520 self.advance();
521 } else {
522 break;
523 }
524 }
525 Some(c) => {
526 text.push(c);
527 self.advance();
528 }
529 }
530 }
531
532 text
533 }
534
535 fn parse_label_arg(&mut self) -> Option<String> {
537 self.skip_spaces();
538 let mut label = String::new();
539 while let Some(c) = self.peek() {
540 if c.is_alphanumeric() || c == '_' || c == '.' || c == '-' {
541 label.push(c);
542 self.advance();
543 } else {
544 break;
545 }
546 }
547 if label.is_empty() {
548 None
549 } else {
550 Some(label)
551 }
552 }
553
554 fn parse_filename_arg(&mut self) -> String {
556 self.skip_spaces();
557 let mut filename = String::new();
558 while let Some(c) = self.peek() {
559 if c == '\n' || c == ';' {
560 break;
561 }
562 filename.push(c);
563 self.advance();
564 }
565 filename.trim_end().to_string()
566 }
567
568 fn parse_block(&mut self) -> Result<Vec<SedCommand>> {
570 let mut commands = Vec::new();
571 loop {
572 self.skip_blanks();
573 if self.is_at_end() {
574 return Err(Error::Parse(
575 "unterminated block (missing '}')".into(),
576 ));
577 }
578 if self.peek() == Some('}') {
579 self.advance();
580 break;
581 }
582 if let Some(cmd) = self.parse_one_command()? {
583 commands.push(cmd);
584 }
585 }
586 Ok(commands)
587 }
588}
589
590#[cfg(test)]
595mod tests {
596 use super::*;
597
598 #[test]
599 fn parse_simple_substitute() {
600 let cmds = parse("s/foo/bar/g").unwrap();
601 assert_eq!(cmds.len(), 1);
602 match &cmds[0].command {
603 Command::Substitute(s) => {
604 assert_eq!(s.pattern, "foo");
605 assert_eq!(s.replacement, "bar");
606 assert!(s.global);
607 }
608 other => panic!("expected Substitute, got {other:?}"),
609 }
610 }
611
612 #[test]
613 fn parse_substitute_custom_delim() {
614 let cmds = parse("s|foo|bar|").unwrap();
615 assert_eq!(cmds.len(), 1);
616 match &cmds[0].command {
617 Command::Substitute(s) => {
618 assert_eq!(s.pattern, "foo");
619 assert_eq!(s.replacement, "bar");
620 assert!(!s.global);
621 }
622 other => panic!("expected Substitute, got {other:?}"),
623 }
624 }
625
626 #[test]
627 fn parse_address_line() {
628 let cmds = parse("3d").unwrap();
629 assert_eq!(cmds.len(), 1);
630 match &cmds[0].address {
631 AddressRange::Single {
632 addr: Address::Line(3),
633 negated: false,
634 } => {}
635 other => panic!("unexpected address: {other:?}"),
636 }
637 }
638
639 #[test]
640 fn parse_address_range_lines() {
641 let cmds = parse("1,10d").unwrap();
642 assert_eq!(cmds.len(), 1);
643 match &cmds[0].address {
644 AddressRange::Range {
645 start: Address::Line(1),
646 end: Address::Line(10),
647 negated: false,
648 } => {}
649 other => panic!("unexpected address: {other:?}"),
650 }
651 }
652
653 #[test]
654 fn parse_address_regex() {
655 let cmds = parse("/^foo/d").unwrap();
656 assert_eq!(cmds.len(), 1);
657 match &cmds[0].address {
658 AddressRange::Single {
659 addr: Address::Regex(re),
660 negated: false,
661 } => assert_eq!(re, "^foo"),
662 other => panic!("unexpected address: {other:?}"),
663 }
664 }
665
666 #[test]
667 fn parse_negated() {
668 let cmds = parse("/foo/!d").unwrap();
669 assert_eq!(cmds.len(), 1);
670 match &cmds[0].address {
671 AddressRange::Single {
672 addr: Address::Regex(_),
673 negated: true,
674 } => {}
675 other => panic!("unexpected address: {other:?}"),
676 }
677 }
678
679 #[test]
680 fn parse_multiple_commands() {
681 let cmds = parse("s/a/b/; s/c/d/").unwrap();
682 assert_eq!(cmds.len(), 2);
683 }
684
685 #[test]
686 fn parse_block() {
687 let cmds = parse("/foo/ { s/a/b/; s/c/d/ }").unwrap();
688 assert_eq!(cmds.len(), 1);
689 match &cmds[0].command {
690 Command::Block(block) => assert_eq!(block.len(), 2),
691 other => panic!("expected Block, got {other:?}"),
692 }
693 }
694
695 #[test]
696 fn parse_transliterate() {
697 let cmds = parse("y/abc/xyz/").unwrap();
698 assert_eq!(cmds.len(), 1);
699 match &cmds[0].command {
700 Command::Transliterate { from, to } => {
701 assert_eq!(from, &['a', 'b', 'c']);
702 assert_eq!(to, &['x', 'y', 'z']);
703 }
704 other => panic!("expected Transliterate, got {other:?}"),
705 }
706 }
707
708 #[test]
709 fn parse_labels_and_branches() {
710 let cmds = parse(":loop\ns/foo/bar/\nt loop").unwrap();
711 assert_eq!(cmds.len(), 3);
712 match &cmds[0].command {
713 Command::Label(l) => assert_eq!(l, "loop"),
714 other => panic!("expected Label, got {other:?}"),
715 }
716 match &cmds[2].command {
717 Command::BranchIfSub(Some(l)) => assert_eq!(l, "loop"),
718 other => panic!("expected BranchIfSub, got {other:?}"),
719 }
720 }
721
722 #[test]
723 fn parse_append_text() {
724 let cmds = parse("a hello world").unwrap();
725 assert_eq!(cmds.len(), 1);
726 match &cmds[0].command {
727 Command::Append(t) => assert_eq!(t, "hello world"),
728 other => panic!("expected Append, got {other:?}"),
729 }
730 }
731
732 #[test]
733 fn parse_last_line_address() {
734 let cmds = parse("$d").unwrap();
735 assert_eq!(cmds.len(), 1);
736 match &cmds[0].address {
737 AddressRange::Single {
738 addr: Address::Last,
739 ..
740 } => {}
741 other => panic!("expected Last address, got {other:?}"),
742 }
743 }
744
745 #[test]
746 fn parse_step_address() {
747 let cmds = parse("0~2d").unwrap();
748 assert_eq!(cmds.len(), 1);
749 match &cmds[0].address {
750 AddressRange::Single {
751 addr: Address::Step { first: 0, step: 2 },
752 ..
753 } => {}
754 other => panic!("expected Step address, got {other:?}"),
755 }
756 }
757}