1use std::collections::HashMap;
11use std::fs;
12use std::io::{self, BufRead, Write};
13use std::path::Path;
14
15use regex::Regex;
16
17use crate::cli::Options;
18use crate::command::{Address, AddressRange, Command, SedCommand};
19use crate::error::{Error, Result};
20
21enum CompiledAddress {
26 Line(usize),
27 Last,
28 Regex(Regex),
29 Step { first: usize, step: usize },
30}
31
32struct CompiledAddressRange {
33 kind: CompiledAddressKind,
34 negated: bool,
35}
36
37enum CompiledAddressKind {
38 None,
39 Single(CompiledAddress),
40 Range(CompiledAddress, CompiledAddress),
41}
42
43impl CompiledAddressRange {
44 fn none() -> Self {
45 Self {
46 kind: CompiledAddressKind::None,
47 negated: false,
48 }
49 }
50}
51
52struct CompiledSubstitute {
53 pattern: Regex,
54 replacement: String,
55 global: bool,
56 print: bool,
57 nth: Option<usize>,
58 write_file: Option<String>,
59}
60
61enum CompiledCommand {
62 ScopeStart,
63 ScopeEnd,
64 Substitute(CompiledSubstitute),
65 Transliterate { from: Vec<char>, to: Vec<char> },
66 Print,
67 PrintFirstLine,
68 PrintLineNumber,
69 List,
70 Delete,
71 DeleteFirstLine,
72 Next,
73 NextAppend,
74 HoldReplace,
75 HoldAppend,
76 GetReplace,
77 GetAppend,
78 Exchange,
79 Append(String),
80 Insert(String),
81 Change(String),
82 #[allow(dead_code)]
83 Label(String),
84 Branch(Option<String>),
85 BranchIfSub(Option<String>),
86 BranchIfNotSub(Option<String>),
87 ReadFile(String),
88 WriteFile(String),
89 Quit(Option<i32>),
90 QuitNoprint(Option<i32>),
91 ClearPattern,
92 Noop,
93}
94
95struct CompiledSedCommand {
96 address: CompiledAddressRange,
97 command: CompiledCommand,
98}
99
100enum Flow {
105 Continue,
107 Restart,
109 RestartScript,
111 Branch(Option<String>),
113 Quit(i32),
115 QuitNoPrint(i32),
117}
118
119pub struct Engine {
124 commands: Vec<CompiledSedCommand>,
125 labels: HashMap<String, usize>,
126 quiet: bool,
127 null_data: bool,
128}
129
130struct State {
132 line_number: usize,
133 is_last_line: bool,
134 pattern_space: String,
135 hold_space: String,
136 append_queue: Vec<String>,
137 last_sub_success: bool,
138 range_active: Vec<bool>,
140}
141
142impl State {
143 fn new(num_commands: usize) -> Self {
144 Self {
145 line_number: 0,
146 is_last_line: false,
147 pattern_space: String::new(),
148 hold_space: String::new(),
149 append_queue: Vec::new(),
150 last_sub_success: false,
151 range_active: vec![false; num_commands],
152 }
153 }
154}
155
156impl Engine {
157 pub fn new(
158 parsed: Vec<SedCommand>,
159 options: &Options,
160 ) -> Result<Self> {
161 let (commands, labels) = compile_commands(parsed)?;
162 Ok(Self {
163 commands,
164 labels,
165 quiet: options.quiet,
166 null_data: options.null_data,
167 })
168 }
169
170 fn write_line<W: Write>(
173 &self,
174 writer: &mut W,
175 s: &str,
176 ) -> Result<()> {
177 write!(writer, "{}", s)?;
178 if self.null_data {
179 writer.write_all(b"\0")?;
180 } else {
181 writer.write_all(b"\n")?;
182 }
183 Ok(())
184 }
185
186 pub fn run(&self, files: &[std::path::PathBuf]) -> Result<()> {
188 let stdout = io::stdout();
189 let mut out = io::BufWriter::new(stdout.lock());
190
191 if files.is_empty() {
192 let stdin = io::stdin();
193 let reader = stdin.lock();
194 self.process_stream(reader, &mut out)?;
195 } else {
196 for path in files {
197 if !path.exists() {
198 eprintln!(
199 "sed: can't read {}: No such file or directory",
200 path.display()
201 );
202 continue;
203 }
204 let file = fs::File::open(path)?;
205 let reader = io::BufReader::new(file);
206 self.process_stream(reader, &mut out)?;
207 }
208 }
209
210 out.flush()?;
211 Ok(())
212 }
213
214 pub fn run_in_place(
216 &self,
217 files: &[std::path::PathBuf],
218 backup_suffix: &str,
219 ) -> Result<()> {
220 for path in files {
221 if !path.exists() {
222 eprintln!(
223 "sed: can't read {}: No such file or directory",
224 path.display()
225 );
226 continue;
227 }
228
229 let file = fs::File::open(path)?;
231 let reader = io::BufReader::new(file);
232 let mut output = Vec::new();
233 {
234 let mut cursor = io::Cursor::new(&mut output);
235 self.process_stream(reader, &mut cursor)?;
236 }
237
238 if !backup_suffix.is_empty() {
240 let backup_path = format!(
241 "{}{}",
242 path.display(),
243 backup_suffix
244 );
245 fs::copy(path, &backup_path)?;
246 }
247
248 write_atomic(path, &output)?;
250 }
251 Ok(())
252 }
253
254 pub fn process_stream<R: BufRead, W: Write>(
260 &self,
261 reader: R,
262 writer: &mut W,
263 ) -> Result<()> {
264 let mut line_reader = LineReader::new(reader, self.null_data);
265 let mut state = State::new(self.commands.len());
266
267 while let Some((line, is_last)) = line_reader.read_line()? {
268 state.line_number += 1;
269 state.is_last_line = is_last;
270 state.pattern_space = line;
271 state.append_queue.clear();
272
273 loop {
276 match self.execute_all(
277 &mut state,
278 &mut line_reader,
279 writer,
280 )? {
281 Flow::Restart => {
282 self.flush_appends(&state, writer)?;
285 break;
286 }
287 Flow::RestartScript => {
288 state.append_queue.clear();
291 continue;
292 }
293 Flow::Quit(code) => {
294 if !self.quiet {
296 self.write_line(
297 writer,
298 &state.pattern_space,
299 )?;
300 }
301 self.flush_appends(&state, writer)?;
302 if code != 0 {
303 std::process::exit(code);
304 }
305 return Ok(());
306 }
307 Flow::QuitNoPrint(code) => {
308 if code != 0 {
309 std::process::exit(code);
310 }
311 return Ok(());
312 }
313 Flow::Continue | Flow::Branch(_) => {
314 if !self.quiet {
315 self.write_line(
316 writer,
317 &state.pattern_space,
318 )?;
319 }
320 self.flush_appends(&state, writer)?;
321 break;
322 }
323 }
324 }
325 }
326
327 Ok(())
328 }
329
330 fn execute_all<R: BufRead, W: Write>(
331 &self,
332 state: &mut State,
333 line_reader: &mut LineReader<R>,
334 writer: &mut W,
335 ) -> Result<Flow> {
336 let mut pc: usize = 0;
337 let mut scope_depth: usize = 0;
338 let mut skip_depth: Option<usize> = None;
339
340 state.last_sub_success = false;
342
343 while pc < self.commands.len() {
344 let cmd = &self.commands[pc];
345
346 if let Some(sd) = skip_depth {
348 match &cmd.command {
349 CompiledCommand::ScopeStart => {
350 scope_depth += 1;
351 }
352 CompiledCommand::ScopeEnd => {
353 if scope_depth == sd {
354 skip_depth = None;
355 }
356 scope_depth = scope_depth.saturating_sub(1);
357 }
358 _ => {}
359 }
360 pc += 1;
361 continue;
362 }
363
364 match &cmd.command {
365 CompiledCommand::ScopeStart => {
366 scope_depth += 1;
367 if !self.address_matches(
368 &cmd.address,
369 state,
370 pc,
371 ) {
372 skip_depth = Some(scope_depth);
373 }
374 pc += 1;
375 continue;
376 }
377 CompiledCommand::ScopeEnd => {
378 scope_depth = scope_depth.saturating_sub(1);
379 pc += 1;
380 continue;
381 }
382 _ => {}
383 }
384
385 if !self.address_matches(&cmd.address, state, pc) {
387 pc += 1;
388 continue;
389 }
390
391 let flow = self.execute_one(
393 &cmd.command,
394 state,
395 line_reader,
396 writer,
397 )?;
398
399 match flow {
400 Flow::Continue => {
401 pc += 1;
402 }
403 Flow::Restart => return Ok(Flow::Restart),
404 Flow::RestartScript => {
405 return Ok(Flow::RestartScript)
406 }
407 Flow::Quit(c) => return Ok(Flow::Quit(c)),
408 Flow::QuitNoPrint(c) => return Ok(Flow::QuitNoPrint(c)),
409 Flow::Branch(ref label) => {
410 if let Some(name) = label {
411 if let Some(&target) = self.labels.get(name) {
412 pc = target;
413 continue;
414 }
415 }
416 return Ok(Flow::Continue);
418 }
419 }
420 }
421
422 Ok(Flow::Continue)
423 }
424
425 fn execute_one<R: BufRead, W: Write>(
426 &self,
427 command: &CompiledCommand,
428 state: &mut State,
429 line_reader: &mut LineReader<R>,
430 writer: &mut W,
431 ) -> Result<Flow> {
432 match command {
433 CompiledCommand::Substitute(sub) => {
434 let (result, matched) = apply_substitution(
435 &sub.pattern,
436 &state.pattern_space,
437 &sub.replacement,
438 sub.global,
439 sub.nth,
440 );
441 if matched {
442 state.pattern_space = result;
443 state.last_sub_success = true;
444 if sub.print {
445 self.write_line(
446 writer,
447 &state.pattern_space,
448 )?;
449 }
450 if let Some(ref path) = sub.write_file {
451 let mut f = fs::OpenOptions::new()
452 .create(true)
453 .append(true)
454 .open(path)?;
455 writeln!(f, "{}", state.pattern_space)?;
456 }
457 }
458 }
459
460 CompiledCommand::Transliterate { from, to } => {
461 let mut new = String::with_capacity(
462 state.pattern_space.len(),
463 );
464 for c in state.pattern_space.chars() {
465 if let Some(pos) = from.iter().position(|&f| f == c) {
466 new.push(to[pos]);
467 } else {
468 new.push(c);
469 }
470 }
471 state.pattern_space = new;
472 }
473
474 CompiledCommand::Delete => return Ok(Flow::Restart),
475
476 CompiledCommand::DeleteFirstLine => {
477 if let Some(pos) = state.pattern_space.find('\n') {
478 state.pattern_space =
479 state.pattern_space[pos + 1..].to_string();
480 return Ok(Flow::RestartScript);
483 } else {
484 return Ok(Flow::Restart);
486 }
487 }
488
489 CompiledCommand::Print => {
490 self.write_line(writer, &state.pattern_space)?;
491 }
492
493 CompiledCommand::PrintFirstLine => {
494 if let Some(pos) = state.pattern_space.find('\n') {
495 self.write_line(
496 writer,
497 &state.pattern_space[..pos],
498 )?;
499 } else {
500 self.write_line(writer, &state.pattern_space)?;
501 }
502 }
503
504 CompiledCommand::PrintLineNumber => {
505 self.write_line(
506 writer,
507 &state.line_number.to_string(),
508 )?;
509 }
510
511 CompiledCommand::List => {
512 let listed = list_escape(&state.pattern_space);
514 self.write_line(writer, &format!("{}$", listed))?;
515 }
516
517 CompiledCommand::Next => {
518 if !self.quiet {
520 self.write_line(writer, &state.pattern_space)?;
521 }
522 self.flush_appends(state, writer)?;
523
524 if let Some((line, is_last)) = line_reader.read_line()? {
526 state.line_number += 1;
527 state.is_last_line = is_last;
528 state.pattern_space = line;
529 } else {
530 return Ok(Flow::QuitNoPrint(0));
533 }
534 }
535
536 CompiledCommand::NextAppend => {
537 if let Some((line, is_last)) = line_reader.read_line()? {
538 state.line_number += 1;
539 state.is_last_line = is_last;
540 state.pattern_space.push('\n');
541 state.pattern_space.push_str(&line);
542 } else {
543 if !self.quiet {
545 self.write_line(
546 writer,
547 &state.pattern_space,
548 )?;
549 }
550 return Ok(Flow::QuitNoPrint(0));
551 }
552 }
553
554 CompiledCommand::HoldReplace => {
555 state.hold_space = state.pattern_space.clone();
556 }
557 CompiledCommand::HoldAppend => {
558 state.hold_space.push('\n');
559 state.hold_space.push_str(&state.pattern_space);
560 }
561 CompiledCommand::GetReplace => {
562 state.pattern_space = state.hold_space.clone();
563 }
564 CompiledCommand::GetAppend => {
565 state.pattern_space.push('\n');
566 let hold = state.hold_space.clone();
567 state.pattern_space.push_str(&hold);
568 }
569 CompiledCommand::Exchange => {
570 std::mem::swap(
571 &mut state.pattern_space,
572 &mut state.hold_space,
573 );
574 }
575
576 CompiledCommand::Append(text) => {
577 state.append_queue.push(text.clone());
578 }
579 CompiledCommand::Insert(text) => {
580 self.write_line(writer, text)?;
581 }
582 CompiledCommand::Change(text) => {
583 self.write_line(writer, text)?;
586 return Ok(Flow::Restart);
587 }
588
589 CompiledCommand::Branch(label) => {
590 return Ok(Flow::Branch(label.clone()));
591 }
592 CompiledCommand::BranchIfSub(label) => {
593 if state.last_sub_success {
594 state.last_sub_success = false;
595 return Ok(Flow::Branch(label.clone()));
596 }
597 }
598 CompiledCommand::BranchIfNotSub(label) => {
599 if !state.last_sub_success {
600 state.last_sub_success = false;
601 return Ok(Flow::Branch(label.clone()));
602 }
603 }
604
605 CompiledCommand::ReadFile(path) => {
606 if let Ok(content) = fs::read_to_string(path) {
607 state.append_queue.push(content.trim_end().to_string());
608 }
609 }
610 CompiledCommand::WriteFile(path) => {
611 let mut f = fs::OpenOptions::new()
612 .create(true)
613 .append(true)
614 .open(path)?;
615 writeln!(f, "{}", state.pattern_space)?;
616 }
617
618 CompiledCommand::Quit(code) => {
619 return Ok(Flow::Quit(code.unwrap_or(0)));
620 }
621 CompiledCommand::QuitNoprint(code) => {
622 return Ok(Flow::QuitNoPrint(code.unwrap_or(0)));
623 }
624 CompiledCommand::ClearPattern => {
625 state.pattern_space.clear();
626 }
627
628 CompiledCommand::Label(_)
629 | CompiledCommand::Noop
630 | CompiledCommand::ScopeStart
631 | CompiledCommand::ScopeEnd => {}
632 }
633
634 Ok(Flow::Continue)
635 }
636
637 fn address_matches(
638 &self,
639 addr: &CompiledAddressRange,
640 state: &mut State,
641 cmd_index: usize,
642 ) -> bool {
643 let raw = match &addr.kind {
644 CompiledAddressKind::None => true,
645 CompiledAddressKind::Single(a) => {
646 addr_matches_line(a, state)
647 }
648 CompiledAddressKind::Range(start, end) => {
649 let active = state
650 .range_active
651 .get(cmd_index)
652 .copied()
653 .unwrap_or(false);
654
655 if active {
656 if addr_matches_line(end, state) {
658 if let Some(v) =
660 state.range_active.get_mut(cmd_index)
661 {
662 *v = false;
663 }
664 }
665 true
666 } else if addr_matches_line(start, state) {
667 if let Some(v) =
669 state.range_active.get_mut(cmd_index)
670 {
671 *v = true;
672 }
673 if is_line_addr_at_or_before(end, state) {
679 if let Some(v) =
680 state.range_active.get_mut(cmd_index)
681 {
682 *v = false;
683 }
684 }
685 true
686 } else {
687 false
688 }
689 }
690 };
691
692 if addr.negated { !raw } else { raw }
693 }
694
695 fn flush_appends<W: Write>(
696 &self,
697 state: &State,
698 writer: &mut W,
699 ) -> Result<()> {
700 for text in &state.append_queue {
701 self.write_line(writer, text)?;
702 }
703 Ok(())
704 }
705}
706
707fn is_line_addr_at_or_before(
714 addr: &CompiledAddress,
715 state: &State,
716) -> bool {
717 match addr {
718 CompiledAddress::Line(n) => *n <= state.line_number,
719 _ => false,
720 }
721}
722
723fn addr_matches_line(addr: &CompiledAddress, state: &State) -> bool {
724 match addr {
725 CompiledAddress::Line(n) => state.line_number == *n,
726 CompiledAddress::Last => state.is_last_line,
727 CompiledAddress::Regex(re) => re.is_match(&state.pattern_space),
728 CompiledAddress::Step { first, step } => {
729 if *step == 0 {
730 state.line_number == *first
731 } else if *first == 0 {
732 state.line_number % step == 0
733 } else {
734 state.line_number >= *first
735 && (state.line_number - first) % step == 0
736 }
737 }
738 }
739}
740
741fn apply_substitution(
748 regex: &Regex,
749 text: &str,
750 replacement: &str,
751 global: bool,
752 nth: Option<usize>,
753) -> (String, bool) {
754 let mut result = String::with_capacity(text.len());
755 let mut last_end = 0;
756 let mut match_count: usize = 0;
757 let mut made_substitution = false;
758
759 for captures in regex.captures_iter(text) {
760 let whole = captures.get(0).unwrap();
761 match_count += 1;
762
763 let should_replace = match nth {
764 Some(n) => match_count == n,
765 None => global || match_count == 1,
766 };
767
768 if should_replace {
769 result.push_str(&text[last_end..whole.start()]);
770 apply_sed_replacement(&captures, replacement, &mut result);
771 last_end = whole.end();
772 made_substitution = true;
773 if !global {
774 break;
775 }
776 } else if !global && nth.is_none() {
777 break;
778 }
779 }
780
781 result.push_str(&text[last_end..]);
782 (result, made_substitution)
783}
784
785fn apply_sed_replacement(
795 captures: ®ex::Captures,
796 replacement: &str,
797 output: &mut String,
798) {
799 let mut chars = replacement.chars().peekable();
800 while let Some(c) = chars.next() {
801 match c {
802 '\\' => {
803 if let Some(&next) = chars.peek() {
804 match next {
805 '0'..='9' => {
806 chars.next();
807 let group = (next as u8 - b'0') as usize;
808 if let Some(m) = captures.get(group) {
809 output.push_str(m.as_str());
810 }
811 }
812 'n' => {
813 chars.next();
814 output.push('\n');
815 }
816 't' => {
817 chars.next();
818 output.push('\t');
819 }
820 '\\' => {
821 chars.next();
822 output.push('\\');
823 }
824 '&' => {
825 chars.next();
826 output.push('&');
827 }
828 _ => {
829 output.push('\\');
831 }
832 }
833 } else {
834 output.push('\\');
835 }
836 }
837 '&' => {
838 if let Some(m) = captures.get(0) {
839 output.push_str(m.as_str());
840 }
841 }
842 _ => output.push(c),
843 }
844 }
845}
846
847fn list_escape(s: &str) -> String {
852 let mut out = String::new();
853 for c in s.chars() {
854 match c {
855 '\\' => out.push_str("\\\\"),
856 '\n' => out.push_str("\\n"),
857 '\t' => out.push_str("\\t"),
858 '\r' => out.push_str("\\r"),
859 '\x07' => out.push_str("\\a"),
860 '\x08' => out.push_str("\\b"),
861 '\x0C' => out.push_str("\\f"),
862 '\x1B' => out.push_str("\\e"),
863 c if c.is_control() => {
864 out.push_str(&format!("\\x{:02X}", c as u32));
865 }
866 c => out.push(c),
867 }
868 }
869 out
870}
871
872struct LineReader<R: BufRead> {
877 reader: R,
878 buf: String,
879 pending: Option<String>,
880 exhausted: bool,
881 null_data: bool,
882}
883
884impl<R: BufRead> LineReader<R> {
885 fn new(mut reader: R, null_data: bool) -> Self {
886 let mut buf = String::new();
887 let pending = if null_data {
888 read_until_null(&mut reader, &mut buf)
889 } else {
890 match reader.read_line(&mut buf) {
891 Ok(0) => None,
892 Ok(_) => Some(chomp(&buf)),
893 Err(_) => None,
894 }
895 };
896
897 let exhausted = pending.is_none();
898 Self {
899 reader,
900 buf: String::new(),
901 pending,
902 exhausted,
903 null_data,
904 }
905 }
906
907 fn read_line(&mut self) -> Result<Option<(String, bool)>> {
909 let current = match self.pending.take() {
910 Some(line) => line,
911 None => return Ok(None),
912 };
913
914 self.buf.clear();
916 let next = if self.null_data {
917 read_until_null(&mut self.reader, &mut self.buf)
918 } else {
919 match self.reader.read_line(&mut self.buf) {
920 Ok(0) => None,
921 Ok(_) => Some(chomp(&self.buf)),
922 Err(e) => return Err(Error::Io(e)),
923 }
924 };
925
926 let is_last = next.is_none();
927 self.pending = next;
928 self.exhausted = is_last;
929
930 Ok(Some((current, is_last)))
931 }
932}
933
934fn chomp(s: &str) -> String {
936 let mut s = s.to_string();
937 if s.ends_with('\n') {
938 s.pop();
939 if s.ends_with('\r') {
940 s.pop();
941 }
942 }
943 s
944}
945
946fn read_until_null<R: BufRead>(
948 reader: &mut R,
949 buf: &mut String,
950) -> Option<String> {
951 buf.clear();
952 let mut byte_buf = Vec::new();
953 match reader.read_until(b'\0', &mut byte_buf) {
954 Ok(0) => None,
955 Ok(_) => {
956 if byte_buf.last() == Some(&b'\0') {
958 byte_buf.pop();
959 }
960 Some(String::from_utf8_lossy(&byte_buf).into_owned())
961 }
962 Err(_) => None,
963 }
964}
965
966fn write_atomic(path: &Path, data: &[u8]) -> Result<()> {
971 let path = fs::canonicalize(path)?;
972 let parent = path
973 .parent()
974 .ok_or_else(|| Error::InvalidPath(path.to_path_buf()))?;
975
976 let temp = tempfile::NamedTempFile::new_in(parent)?;
977 let file = temp.as_file();
978
979 if let Ok(metadata) = fs::metadata(&path) {
981 file.set_permissions(metadata.permissions()).ok();
982
983 #[cfg(unix)]
984 {
985 use std::os::unix::fs::{MetadataExt, fchown};
986 let _ =
987 fchown(file, Some(metadata.uid()), Some(metadata.gid()));
988 }
989 }
990
991 let mut writer = io::BufWriter::new(file);
992 writer.write_all(data)?;
993 writer.flush()?;
994 drop(writer);
995
996 temp.persist(&path)?;
997 Ok(())
998}
999
1000fn compile_commands(
1005 parsed: Vec<SedCommand>,
1006) -> Result<(Vec<CompiledSedCommand>, HashMap<String, usize>)> {
1007 let mut compiled = Vec::new();
1008 let mut labels = HashMap::new();
1009 flatten_and_compile(parsed, &mut compiled, &mut labels)?;
1010 Ok((compiled, labels))
1011}
1012
1013fn flatten_and_compile(
1014 commands: Vec<SedCommand>,
1015 compiled: &mut Vec<CompiledSedCommand>,
1016 labels: &mut HashMap<String, usize>,
1017) -> Result<()> {
1018 for cmd in commands {
1019 match cmd.command {
1020 Command::Block(block) => {
1021 let addr = compile_address_range(cmd.address)?;
1022 compiled.push(CompiledSedCommand {
1023 address: addr,
1024 command: CompiledCommand::ScopeStart,
1025 });
1026 flatten_and_compile(block, compiled, labels)?;
1027 compiled.push(CompiledSedCommand {
1028 address: CompiledAddressRange::none(),
1029 command: CompiledCommand::ScopeEnd,
1030 });
1031 }
1032 Command::Label(ref name) => {
1033 labels.insert(name.clone(), compiled.len());
1034 compiled.push(CompiledSedCommand {
1035 address: CompiledAddressRange::none(),
1036 command: CompiledCommand::Label(name.clone()),
1037 });
1038 }
1039 other => {
1040 let addr = compile_address_range(cmd.address)?;
1041 let cc = compile_single_command(other)?;
1042 compiled.push(CompiledSedCommand {
1043 address: addr,
1044 command: cc,
1045 });
1046 }
1047 }
1048 }
1049 Ok(())
1050}
1051
1052fn compile_address_range(
1053 range: AddressRange,
1054) -> Result<CompiledAddressRange> {
1055 match range {
1056 AddressRange::None => Ok(CompiledAddressRange::none()),
1057 AddressRange::Single { addr, negated } => {
1058 Ok(CompiledAddressRange {
1059 kind: CompiledAddressKind::Single(
1060 compile_address(addr)?,
1061 ),
1062 negated,
1063 })
1064 }
1065 AddressRange::Range {
1066 start,
1067 end,
1068 negated,
1069 } => Ok(CompiledAddressRange {
1070 kind: CompiledAddressKind::Range(
1071 compile_address(start)?,
1072 compile_address(end)?,
1073 ),
1074 negated,
1075 }),
1076 }
1077}
1078
1079fn compile_address(addr: Address) -> Result<CompiledAddress> {
1080 match addr {
1081 Address::Line(n) => Ok(CompiledAddress::Line(n)),
1082 Address::Last => Ok(CompiledAddress::Last),
1083 Address::Regex(pattern) => {
1084 let re = regex::RegexBuilder::new(&pattern)
1085 .multi_line(true)
1086 .build()?;
1087 Ok(CompiledAddress::Regex(re))
1088 }
1089 Address::Step { first, step } => {
1090 Ok(CompiledAddress::Step { first, step })
1091 }
1092 }
1093}
1094
1095fn compile_single_command(cmd: Command) -> Result<CompiledCommand> {
1096 match cmd {
1097 Command::Substitute(sub) => {
1098 let re = regex::RegexBuilder::new(&sub.pattern)
1099 .case_insensitive(sub.case_insensitive)
1100 .multi_line(true)
1101 .build()?;
1102 Ok(CompiledCommand::Substitute(CompiledSubstitute {
1103 pattern: re,
1104 replacement: sub.replacement,
1105 global: sub.global,
1106 print: sub.print,
1107 nth: sub.nth,
1108 write_file: sub.write_file,
1109 }))
1110 }
1111 Command::Transliterate { from, to } => {
1112 Ok(CompiledCommand::Transliterate { from, to })
1113 }
1114 Command::Print => Ok(CompiledCommand::Print),
1115 Command::PrintFirstLine => Ok(CompiledCommand::PrintFirstLine),
1116 Command::PrintLineNumber => Ok(CompiledCommand::PrintLineNumber),
1117 Command::List => Ok(CompiledCommand::List),
1118 Command::Delete => Ok(CompiledCommand::Delete),
1119 Command::DeleteFirstLine => {
1120 Ok(CompiledCommand::DeleteFirstLine)
1121 }
1122 Command::Next => Ok(CompiledCommand::Next),
1123 Command::NextAppend => Ok(CompiledCommand::NextAppend),
1124 Command::HoldReplace => Ok(CompiledCommand::HoldReplace),
1125 Command::HoldAppend => Ok(CompiledCommand::HoldAppend),
1126 Command::GetReplace => Ok(CompiledCommand::GetReplace),
1127 Command::GetAppend => Ok(CompiledCommand::GetAppend),
1128 Command::Exchange => Ok(CompiledCommand::Exchange),
1129 Command::Append(t) => Ok(CompiledCommand::Append(t)),
1130 Command::Insert(t) => Ok(CompiledCommand::Insert(t)),
1131 Command::Change(t) => Ok(CompiledCommand::Change(t)),
1132 Command::Branch(l) => Ok(CompiledCommand::Branch(l)),
1133 Command::BranchIfSub(l) => Ok(CompiledCommand::BranchIfSub(l)),
1134 Command::BranchIfNotSub(l) => {
1135 Ok(CompiledCommand::BranchIfNotSub(l))
1136 }
1137 Command::ReadFile(f) => Ok(CompiledCommand::ReadFile(f)),
1138 Command::WriteFile(f) => Ok(CompiledCommand::WriteFile(f)),
1139 Command::WriteFirstLine(f) => {
1140 Ok(CompiledCommand::WriteFile(f))
1141 }
1142 Command::Quit(c) => Ok(CompiledCommand::Quit(c)),
1143 Command::QuitNoprint(c) => Ok(CompiledCommand::QuitNoprint(c)),
1144 Command::ClearPattern => Ok(CompiledCommand::ClearPattern),
1145 Command::Noop => Ok(CompiledCommand::Noop),
1146 Command::Label(l) => Ok(CompiledCommand::Label(l)),
1147 Command::Block(_) => {
1148 unreachable!("blocks should be flattened before this point")
1149 }
1150 }
1151}
1152
1153#[cfg(test)]
1158mod tests {
1159 use super::*;
1160 use crate::command;
1161
1162 fn run_sed(script: &str, input: &str) -> String {
1163 run_sed_opts(script, input, false)
1164 }
1165
1166 fn run_sed_opts(script: &str, input: &str, quiet: bool) -> String {
1167 let parsed = command::parse(script).unwrap();
1168 let options = Options {
1169 quiet,
1170 expressions: vec![],
1171 script_files: vec![],
1172 in_place: None,
1173 extended_regexp: false,
1174 separate: false,
1175 null_data: false,
1176 args: vec![],
1177 };
1178 let engine = Engine::new(parsed, &options).unwrap();
1179 let reader = io::Cursor::new(input.as_bytes());
1180 let mut output = Vec::new();
1181 engine
1182 .process_stream(io::BufReader::new(reader), &mut output)
1183 .unwrap();
1184 String::from_utf8(output).unwrap()
1185 }
1186
1187 #[test]
1188 fn substitute_basic() {
1189 assert_eq!(run_sed("s/foo/bar/", "foo\n"), "bar\n");
1190 }
1191
1192 #[test]
1193 fn substitute_global() {
1194 assert_eq!(
1195 run_sed("s/o/0/g", "foo boo\n"),
1196 "f00 b00\n"
1197 );
1198 }
1199
1200 #[test]
1201 fn substitute_first_only() {
1202 assert_eq!(run_sed("s/o/0/", "foo\n"), "f0o\n");
1203 }
1204
1205 #[test]
1206 fn substitute_nth() {
1207 assert_eq!(run_sed("s/o/0/2", "foo boo\n"), "fo0 boo\n");
1208 }
1209
1210 #[test]
1211 fn substitute_ampersand() {
1212 assert_eq!(
1213 run_sed("s/foo/[&]/", "foo\n"),
1214 "[foo]\n"
1215 );
1216 }
1217
1218 #[test]
1219 fn substitute_backreference() {
1220 assert_eq!(
1221 run_sed("s/(f)(o+)/\\2\\1/", "foo\n"),
1222 "oof\n"
1223 );
1224 }
1225
1226 #[test]
1227 fn delete_command() {
1228 assert_eq!(run_sed("2d", "a\nb\nc\n"), "a\nc\n");
1229 }
1230
1231 #[test]
1232 fn print_command_quiet() {
1233 assert_eq!(
1234 run_sed_opts("2p", "a\nb\nc\n", true),
1235 "b\n"
1236 );
1237 }
1238
1239 #[test]
1240 fn address_regex() {
1241 assert_eq!(
1242 run_sed("/^b/d", "apple\nbanana\ncherry\n"),
1243 "apple\ncherry\n"
1244 );
1245 }
1246
1247 #[test]
1248 fn address_range() {
1249 assert_eq!(
1250 run_sed("2,3d", "a\nb\nc\nd\n"),
1251 "a\nd\n"
1252 );
1253 }
1254
1255 #[test]
1256 fn address_negation() {
1257 assert_eq!(
1258 run_sed("2!d", "a\nb\nc\n"),
1259 "b\n"
1260 );
1261 }
1262
1263 #[test]
1264 fn transliterate() {
1265 assert_eq!(
1266 run_sed("y/abc/xyz/", "abc\n"),
1267 "xyz\n"
1268 );
1269 }
1270
1271 #[test]
1272 fn quit_command() {
1273 assert_eq!(run_sed("2q", "a\nb\nc\n"), "a\nb\n");
1274 }
1275
1276 #[test]
1277 fn hold_and_get() {
1278 assert_eq!(
1280 run_sed("1h;2G", "first\nsecond\n"),
1281 "first\nsecond\nfirst\n"
1282 );
1283 }
1284
1285 #[test]
1286 fn exchange() {
1287 assert_eq!(
1288 run_sed("1{h;d};2{x}", "first\nsecond\n"),
1289 "first\n"
1290 );
1291 }
1292
1293 #[test]
1294 fn labels_and_branch() {
1295 assert_eq!(
1297 run_sed(":l\ns/a/b/\nt l", "aaa\n"),
1298 "bbb\n"
1299 );
1300 }
1301
1302 #[test]
1303 fn append_text() {
1304 assert_eq!(
1305 run_sed("1a after first", "first\nsecond\n"),
1306 "first\nafter first\nsecond\n"
1307 );
1308 }
1309
1310 #[test]
1311 fn insert_text() {
1312 assert_eq!(
1313 run_sed("2i before second", "first\nsecond\n"),
1314 "first\nbefore second\nsecond\n"
1315 );
1316 }
1317
1318 #[test]
1319 fn line_number() {
1320 assert_eq!(
1321 run_sed("=", "a\nb\n"),
1322 "1\na\n2\nb\n"
1323 );
1324 }
1325
1326 #[test]
1327 fn multiple_commands() {
1328 assert_eq!(
1329 run_sed("s/a/x/; s/b/y/", "ab\n"),
1330 "xy\n"
1331 );
1332 }
1333
1334 #[test]
1335 fn last_line_address() {
1336 assert_eq!(
1337 run_sed("$d", "a\nb\nc\n"),
1338 "a\nb\n"
1339 );
1340 }
1341
1342 #[test]
1343 fn clear_pattern() {
1344 assert_eq!(run_sed("z", "hello\n"), "\n");
1345 }
1346
1347 #[test]
1348 fn custom_delimiter() {
1349 assert_eq!(
1350 run_sed("s|foo|bar|", "foo\n"),
1351 "bar\n"
1352 );
1353 }
1354
1355 #[test]
1356 fn case_insensitive_sub() {
1357 assert_eq!(
1358 run_sed("s/foo/bar/i", "FOO\n"),
1359 "bar\n"
1360 );
1361 }
1362
1363 #[test]
1364 fn block_with_address() {
1365 assert_eq!(
1366 run_sed("/a/ { s/a/x/; s/$/!/ }", "a\nb\n"),
1367 "x!\nb\n"
1368 );
1369 }
1370
1371 #[test]
1372 fn regex_range_address() {
1373 assert_eq!(
1374 run_sed("/start/,/end/d", "a\nstart\nb\nend\nc\n"),
1375 "a\nc\n"
1376 );
1377 }
1378
1379 #[test]
1380 fn empty_input() {
1381 assert_eq!(run_sed("s/a/b/", ""), "");
1382 }
1383
1384 #[test]
1385 fn passthrough_no_match() {
1386 assert_eq!(
1387 run_sed("s/xyz/abc/", "hello\n"),
1388 "hello\n"
1389 );
1390 }
1391
1392 #[test]
1399 fn n_single_line() {
1400 assert_eq!(run_sed("n", "a\n"), "a\n");
1402 }
1403
1404 #[test]
1405 fn n_two_lines() {
1406 assert_eq!(run_sed("n", "a\nb\n"), "a\nb\n");
1408 }
1409
1410 #[test]
1411 fn n_with_command_after() {
1412 assert_eq!(
1414 run_sed("n;s/b/X/", "a\nb\nc\n"),
1415 "a\nX\nc\n"
1416 );
1417 }
1418
1419 #[test]
1420 fn n_quiet_mode() {
1421 assert_eq!(
1423 run_sed_opts("n;p", "a\nb\nc\n", true),
1424 "b\n"
1425 );
1426 }
1427
1428 #[test]
1431 fn big_n_appends() {
1432 assert_eq!(
1434 run_sed("N;s/\\n/ /", "a\nb\n"),
1435 "a b\n"
1436 );
1437 }
1438
1439 #[test]
1440 fn big_n_at_end() {
1441 assert_eq!(
1443 run_sed("N", "a\nb\nc\n"),
1444 "a\nb\nc\n"
1445 );
1446 }
1447
1448 #[test]
1451 fn big_d_deletes_first_line_of_pattern() {
1452 assert_eq!(
1454 run_sed("N;P;D", "a\nb\nc\n"),
1455 "a\nb\nc\n"
1456 );
1457 }
1458
1459 #[test]
1460 fn big_d_single_line() {
1461 assert_eq!(run_sed("D", "a\nb\n"), "");
1463 }
1464
1465 #[test]
1468 fn change_single_address() {
1469 assert_eq!(
1470 run_sed("2c REPLACED", "a\nb\nc\n"),
1471 "a\nREPLACED\nc\n"
1472 );
1473 }
1474
1475 #[test]
1476 fn change_regex_address() {
1477 assert_eq!(
1478 run_sed("/b/c GONE", "a\nb\nc\n"),
1479 "a\nGONE\nc\n"
1480 );
1481 }
1482
1483 #[test]
1486 fn big_p_first_line() {
1487 assert_eq!(
1489 run_sed("N;P", "first\nsecond\n"),
1490 "first\nfirst\nsecond\n"
1491 );
1492 }
1493
1494 #[test]
1497 fn insert_before_last() {
1498 assert_eq!(
1499 run_sed("$i END", "a\nb\n"),
1500 "a\nEND\nb\n"
1501 );
1502 }
1503
1504 #[test]
1507 fn append_after_match() {
1508 assert_eq!(
1509 run_sed("/b/a AFTER", "a\nb\nc\n"),
1510 "a\nb\nAFTER\nc\n"
1511 );
1512 }
1513
1514 #[test]
1517 fn hold_append_and_get() {
1518 assert_eq!(
1520 run_sed("1H;2G", "first\nsecond\n"),
1521 "first\nsecond\n\nfirst\n"
1522 );
1523 }
1524
1525 #[test]
1526 fn hold_get_replace() {
1527 assert_eq!(
1529 run_sed("1h;2g", "first\nsecond\n"),
1530 "first\nfirst\n"
1531 );
1532 }
1533
1534 #[test]
1535 fn reverse_lines() {
1536 assert_eq!(
1538 run_sed("1!G;h;$!d", "a\nb\nc\n"),
1539 "c\nb\na\n"
1540 );
1541 }
1542
1543 #[test]
1546 fn exchange_basic() {
1547 assert_eq!(
1549 run_sed("x", "a\nb\n"),
1550 "\na\n"
1551 );
1552 }
1553
1554 #[test]
1557 fn range_regex_to_regex() {
1558 assert_eq!(
1559 run_sed("/start/,/end/d", "a\nstart\nb\nend\nc\n"),
1560 "a\nc\n"
1561 );
1562 }
1563
1564 #[test]
1565 fn range_line_to_last() {
1566 assert_eq!(
1567 run_sed("2,$d", "a\nb\nc\nd\n"),
1568 "a\n"
1569 );
1570 }
1571
1572 #[test]
1573 fn range_negated() {
1574 assert_eq!(
1576 run_sed("2,3!d", "a\nb\nc\nd\n"),
1577 "b\nc\n"
1578 );
1579 }
1580
1581 #[test]
1582 fn range_start_equals_end_on_same_line() {
1583 assert_eq!(
1585 run_sed("2,2d", "a\nb\nc\n"),
1586 "a\nc\n"
1587 );
1588 }
1589
1590 #[test]
1591 fn range_regex_no_check_on_start_line() {
1592 assert_eq!(
1595 run_sed_opts(
1596 "/\\[start\\]/,/\\[/p",
1597 "[start]\nfoo\nbar\n[end]\n",
1598 true,
1599 ),
1600 "[start]\nfoo\nbar\n[end]\n"
1601 );
1602 }
1603
1604 #[test]
1607 fn step_even_lines() {
1608 assert_eq!(
1609 run_sed("0~2d", "a\nb\nc\nd\ne\n"),
1610 "a\nc\ne\n"
1611 );
1612 }
1613
1614 #[test]
1615 fn step_odd_lines() {
1616 assert_eq!(
1617 run_sed("1~2d", "a\nb\nc\nd\ne\n"),
1618 "b\nd\n"
1619 );
1620 }
1621
1622 #[test]
1625 fn sub_nth_3() {
1626 assert_eq!(run_sed("s/a/X/3", "aaaaa\n"), "aaXaa\n");
1627 }
1628
1629 #[test]
1630 fn sub_global_and_print() {
1631 assert_eq!(
1632 run_sed_opts("s/a/X/gp", "aaa\nbbb\n", true),
1633 "XXX\n"
1634 );
1635 }
1636
1637 #[test]
1638 fn sub_escaped_delimiter() {
1639 assert_eq!(
1641 run_sed("s/a\\/b/X/", "a/b\n"),
1642 "X\n"
1643 );
1644 }
1645
1646 #[test]
1647 fn sub_newline_in_replacement() {
1648 assert_eq!(
1649 run_sed("s/a/X\\nY/", "a\n"),
1650 "X\nY\n"
1651 );
1652 }
1653
1654 #[test]
1655 fn sub_tab_in_replacement() {
1656 assert_eq!(
1657 run_sed("s/a/X\\tY/", "a\n"),
1658 "X\tY\n"
1659 );
1660 }
1661
1662 #[test]
1663 fn sub_literal_ampersand() {
1664 assert_eq!(
1666 run_sed("s/foo/\\&/", "foo\n"),
1667 "&\n"
1668 );
1669 }
1670
1671 #[test]
1672 fn sub_literal_backslash() {
1673 assert_eq!(
1675 run_sed("s/a/\\\\/", "a\n"),
1676 "\\\n"
1677 );
1678 }
1679
1680 #[test]
1683 fn branch_unconditional() {
1684 assert_eq!(
1686 run_sed("b skip;d;:skip", "hello\n"),
1687 "hello\n"
1688 );
1689 }
1690
1691 #[test]
1692 fn branch_unconditional_no_label() {
1693 assert_eq!(
1695 run_sed("b\nd", "hello\n"),
1696 "hello\n"
1697 );
1698 }
1699
1700 #[test]
1701 fn branch_if_sub_no_match() {
1702 assert_eq!(
1704 run_sed("s/x/y/;t end;s/a/X/;:end", "abc\n"),
1705 "Xbc\n"
1706 );
1707 }
1708
1709 #[test]
1710 fn branch_if_not_sub() {
1711 assert_eq!(
1713 run_sed("s/x/y/;T skip;s/a/SHOULD_NOT/;:skip", "abc\n"),
1714 "abc\n"
1715 );
1716 }
1717
1718 #[test]
1721 fn list_command() {
1722 assert_eq!(
1723 run_sed("l", "a\tb\n"),
1724 "a\\tb$\na\tb\n"
1725 );
1726 }
1727
1728 #[test]
1731 fn line_number_with_address() {
1732 assert_eq!(
1733 run_sed("2=", "a\nb\nc\n"),
1734 "a\n2\nb\nc\n"
1735 );
1736 }
1737
1738 #[test]
1741 fn clear_pattern_in_block() {
1742 assert_eq!(
1743 run_sed("/hello/{z;s/$/EMPTY/}", "hello\nworld\n"),
1744 "EMPTY\nworld\n"
1745 );
1746 }
1747
1748 #[test]
1751 fn strip_html_tags() {
1752 assert_eq!(
1753 run_sed("s/<[^>]*>//g", "<b>bold</b>\n"),
1754 "bold\n"
1755 );
1756 }
1757
1758 #[test]
1759 fn sed_multiline_join() {
1760 assert_eq!(
1762 run_sed(":a;N;s/\\n/ /;$!b a", "one\ntwo\nthree\n"),
1763 "one two three\n"
1764 );
1765 }
1766
1767 #[test]
1768 fn double_space() {
1769 assert_eq!(
1771 run_sed("G", "a\nb\n"),
1772 "a\n\nb\n\n"
1773 );
1774 }
1775
1776 #[test]
1777 fn delete_empty_lines() {
1778 assert_eq!(
1779 run_sed("/^$/d", "a\n\nb\n\nc\n"),
1780 "a\nb\nc\n"
1781 );
1782 }
1783
1784 #[test]
1785 fn multiple_expressions() {
1786 assert_eq!(
1788 run_sed("s/a/x/\ns/b/y/", "ab\n"),
1789 "xy\n"
1790 );
1791 }
1792
1793 #[test]
1794 fn nested_blocks() {
1795 assert_eq!(
1796 run_sed("1{/a/{s/a/X/}}", "abc\ndef\n"),
1797 "Xbc\ndef\n"
1798 );
1799 }
1800
1801 #[test]
1804 fn line_with_no_trailing_newline() {
1805 assert_eq!(run_sed("s/a/X/", "abc"), "Xbc\n");
1807 }
1808
1809 #[test]
1810 fn single_empty_line() {
1811 assert_eq!(run_sed("s/^$/EMPTY/", "\n"), "EMPTY\n");
1812 }
1813
1814 #[test]
1815 fn substitute_with_empty_replacement() {
1816 assert_eq!(run_sed("s/foo//", "foobar\n"), "bar\n");
1817 }
1818
1819 #[test]
1820 fn substitute_with_empty_pattern_match() {
1821 assert_eq!(run_sed("s/^/PREFIX: /", "hello\n"), "PREFIX: hello\n");
1823 }
1824
1825 #[test]
1826 fn multiple_ranges_interleaved() {
1827 let input = "a\nb\nc\nd\ne\nf\n";
1829 assert_eq!(
1830 run_sed("2,3s/./X/;5,6s/./Y/", input),
1831 "a\nX\nX\nd\nY\nY\n"
1832 );
1833 }
1834
1835 #[test]
1836 fn regex_special_chars() {
1837 assert_eq!(
1838 run_sed("s/\\./X/g", "a.b.c\n"),
1839 "aXbXc\n"
1840 );
1841 }
1842
1843 #[test]
1844 fn regex_anchors() {
1845 assert_eq!(
1846 run_sed("s/^/> /", "hello\n"),
1847 "> hello\n"
1848 );
1849 assert_eq!(
1850 run_sed("s/$/ </", "hello\n"),
1851 "hello <\n"
1852 );
1853 }
1854
1855 #[test]
1858 fn quit_prints_current_line() {
1859 assert_eq!(run_sed("2q", "a\nb\nc\n"), "a\nb\n");
1861 }
1862
1863 #[test]
1864 fn quit_no_print() {
1865 assert_eq!(run_sed("2Q", "a\nb\nc\n"), "a\n");
1867 }
1868
1869 #[test]
1872 fn remove_trailing_whitespace() {
1873 assert_eq!(
1874 run_sed("s/[ \t]*$//", "hello \nworld\t\t\n"),
1875 "hello\nworld\n"
1876 );
1877 }
1878
1879 #[test]
1880 fn number_lines() {
1881 assert_eq!(
1883 run_sed("=;s/^/ /", "a\nb\n"),
1884 "1\n a\n2\n b\n"
1885 );
1886 }
1887
1888 #[test]
1889 fn print_only_matches() {
1890 assert_eq!(
1892 run_sed_opts("/foo/p", "foo\nbar\nfoo2\n", true),
1893 "foo\nfoo2\n"
1894 );
1895 }
1896
1897 #[test]
1898 fn delete_first_and_last() {
1899 assert_eq!(
1900 run_sed("1d;$d", "a\nb\nc\n"),
1901 "b\n"
1902 );
1903 }
1904
1905 #[test]
1906 fn change_every_matching_line() {
1907 assert_eq!(
1908 run_sed("/old/c new", "old\nkeep\nold\n"),
1909 "new\nkeep\nnew\n"
1910 );
1911 }
1912}