Skip to main content

sed_rs/
engine.rs

1// The sed execution engine.
2//
3// Compiles parsed commands into an internal representation with pre-compiled
4// regexes, then processes input line-by-line applying the sed cycle:
5//   1. Read a line into the pattern space
6//   2. Execute all commands
7//   3. Unless -n, print the pattern space
8//   4. Flush any queued output (from a, r, etc.)
9
10use std::collections::HashMap;
11use std::fs;
12use std::io::{self, BufRead, Write};
13use std::path::Path;
14
15use regex::Regex;
16
17use crate::cli::Options;
18use crate::command::{Address, AddressRange, Command, SedCommand};
19use crate::error::{Error, Result};
20
21// ---------------------------------------------------------------------------
22// Compiled command representation
23// ---------------------------------------------------------------------------
24
25enum CompiledAddress {
26    Line(usize),
27    Last,
28    Regex(Regex),
29    Step { first: usize, step: usize },
30}
31
32struct CompiledAddressRange {
33    kind: CompiledAddressKind,
34    negated: bool,
35}
36
37enum CompiledAddressKind {
38    None,
39    Single(CompiledAddress),
40    Range(CompiledAddress, CompiledAddress),
41}
42
43impl CompiledAddressRange {
44    fn none() -> Self {
45        Self {
46            kind: CompiledAddressKind::None,
47            negated: false,
48        }
49    }
50}
51
52struct CompiledSubstitute {
53    pattern: Regex,
54    replacement: String,
55    global: bool,
56    print: bool,
57    nth: Option<usize>,
58    write_file: Option<String>,
59}
60
61enum CompiledCommand {
62    ScopeStart,
63    ScopeEnd,
64    Substitute(CompiledSubstitute),
65    Transliterate { from: Vec<char>, to: Vec<char> },
66    Print,
67    PrintFirstLine,
68    PrintLineNumber,
69    List,
70    Delete,
71    DeleteFirstLine,
72    Next,
73    NextAppend,
74    HoldReplace,
75    HoldAppend,
76    GetReplace,
77    GetAppend,
78    Exchange,
79    Append(String),
80    Insert(String),
81    Change(String),
82    #[allow(dead_code)]
83    Label(String),
84    Branch(Option<String>),
85    BranchIfSub(Option<String>),
86    BranchIfNotSub(Option<String>),
87    ReadFile(String),
88    WriteFile(String),
89    Quit(Option<i32>),
90    QuitNoprint(Option<i32>),
91    ClearPattern,
92    Noop,
93}
94
95struct CompiledSedCommand {
96    address: CompiledAddressRange,
97    command: CompiledCommand,
98}
99
100// ---------------------------------------------------------------------------
101// Flow control
102// ---------------------------------------------------------------------------
103
104enum Flow {
105    /// Continue to next command
106    Continue,
107    /// Restart the cycle — read next line (from `d`)
108    Restart,
109    /// Restart script on current pattern space without reading (from `D`)
110    RestartScript,
111    /// Branch to a label or end of script
112    Branch(Option<String>),
113    /// Quit processing
114    Quit(i32),
115    /// Quit without printing
116    QuitNoPrint(i32),
117}
118
119// ---------------------------------------------------------------------------
120// Engine
121// ---------------------------------------------------------------------------
122
123pub struct Engine {
124    commands: Vec<CompiledSedCommand>,
125    labels: HashMap<String, usize>,
126    quiet: bool,
127    null_data: bool,
128}
129
130/// Mutable state during execution
131struct State {
132    line_number: usize,
133    is_last_line: bool,
134    pattern_space: String,
135    hold_space: String,
136    append_queue: Vec<String>,
137    last_sub_success: bool,
138    /// Tracks whether each range-addressed command is currently "in range"
139    range_active: Vec<bool>,
140}
141
142impl State {
143    fn new(num_commands: usize) -> Self {
144        Self {
145            line_number: 0,
146            is_last_line: false,
147            pattern_space: String::new(),
148            hold_space: String::new(),
149            append_queue: Vec::new(),
150            last_sub_success: false,
151            range_active: vec![false; num_commands],
152        }
153    }
154}
155
156impl Engine {
157    pub fn new(
158        parsed: Vec<SedCommand>,
159        options: &Options,
160    ) -> Result<Self> {
161        let (commands, labels) = compile_commands(parsed)?;
162        Ok(Self {
163            commands,
164            labels,
165            quiet: options.quiet,
166            null_data: options.null_data,
167        })
168    }
169
170    /// Write a string followed by the appropriate line terminator
171    /// (NUL in -z mode, newline otherwise).
172    fn write_line<W: Write>(
173        &self,
174        writer: &mut W,
175        s: &str,
176    ) -> Result<()> {
177        write!(writer, "{}", s)?;
178        if self.null_data {
179            writer.write_all(b"\0")?;
180        } else {
181            writer.write_all(b"\n")?;
182        }
183        Ok(())
184    }
185
186    /// Process files (or stdin if empty) and write results to stdout.
187    pub fn run(&self, files: &[std::path::PathBuf]) -> Result<()> {
188        let stdout = io::stdout();
189        let mut out = io::BufWriter::new(stdout.lock());
190
191        if files.is_empty() {
192            let stdin = io::stdin();
193            let reader = stdin.lock();
194            self.process_stream(reader, &mut out)?;
195        } else {
196            for path in files {
197                if !path.exists() {
198                    eprintln!(
199                        "sed: can't read {}: No such file or directory",
200                        path.display()
201                    );
202                    continue;
203                }
204                let file = fs::File::open(path)?;
205                let reader = io::BufReader::new(file);
206                self.process_stream(reader, &mut out)?;
207            }
208        }
209
210        out.flush()?;
211        Ok(())
212    }
213
214    /// Process files in-place, optionally creating backups.
215    pub fn run_in_place(
216        &self,
217        files: &[std::path::PathBuf],
218        backup_suffix: &str,
219    ) -> Result<()> {
220        for path in files {
221            if !path.exists() {
222                eprintln!(
223                    "sed: can't read {}: No such file or directory",
224                    path.display()
225                );
226                continue;
227            }
228
229            // Read the file
230            let file = fs::File::open(path)?;
231            let reader = io::BufReader::new(file);
232            let mut output = Vec::new();
233            {
234                let mut cursor = io::Cursor::new(&mut output);
235                self.process_stream(reader, &mut cursor)?;
236            }
237
238            // Create backup if suffix is non-empty
239            if !backup_suffix.is_empty() {
240                let backup_path = format!(
241                    "{}{}",
242                    path.display(),
243                    backup_suffix
244                );
245                fs::copy(path, &backup_path)?;
246            }
247
248            // Write atomically using tempfile (adapted from sd)
249            write_atomic(path, &output)?;
250        }
251        Ok(())
252    }
253
254    /// Process input from a buffered reader and write output to a writer.
255    ///
256    /// This is the core sed execution loop. It reads lines (or NUL-delimited
257    /// records in `-z` mode), applies the compiled commands, and writes the
258    /// results.
259    pub fn process_stream<R: BufRead, W: Write>(
260        &self,
261        reader: R,
262        writer: &mut W,
263    ) -> Result<()> {
264        let mut line_reader = LineReader::new(reader, self.null_data);
265        let mut state = State::new(self.commands.len());
266
267        while let Some((line, is_last)) = line_reader.read_line()? {
268            state.line_number += 1;
269            state.is_last_line = is_last;
270            state.pattern_space = line;
271            state.append_queue.clear();
272
273            // Inner loop: allows `D` to re-run the script on the
274            // remaining pattern space without reading a new input line.
275            loop {
276                match self.execute_all(
277                    &mut state,
278                    &mut line_reader,
279                    writer,
280                )? {
281                    Flow::Restart => {
282                        // `d` command: skip printing, flush appends,
283                        // break to read next line
284                        self.flush_appends(&state, writer)?;
285                        break;
286                    }
287                    Flow::RestartScript => {
288                        // `D` command: re-run the script on the
289                        // remaining pattern space (no new read)
290                        state.append_queue.clear();
291                        continue;
292                    }
293                    Flow::Quit(code) => {
294                        // Print pattern space then quit
295                        if !self.quiet {
296                            self.write_line(
297                                writer,
298                                &state.pattern_space,
299                            )?;
300                        }
301                        self.flush_appends(&state, writer)?;
302                        if code != 0 {
303                            std::process::exit(code);
304                        }
305                        return Ok(());
306                    }
307                    Flow::QuitNoPrint(code) => {
308                        if code != 0 {
309                            std::process::exit(code);
310                        }
311                        return Ok(());
312                    }
313                    Flow::Continue | Flow::Branch(_) => {
314                        if !self.quiet {
315                            self.write_line(
316                                writer,
317                                &state.pattern_space,
318                            )?;
319                        }
320                        self.flush_appends(&state, writer)?;
321                        break;
322                    }
323                }
324            }
325        }
326
327        Ok(())
328    }
329
330    fn execute_all<R: BufRead, W: Write>(
331        &self,
332        state: &mut State,
333        line_reader: &mut LineReader<R>,
334        writer: &mut W,
335    ) -> Result<Flow> {
336        let mut pc: usize = 0;
337        let mut scope_depth: usize = 0;
338        let mut skip_depth: Option<usize> = None;
339
340        // Reset sub success flag at start of each cycle
341        state.last_sub_success = false;
342
343        while pc < self.commands.len() {
344            let cmd = &self.commands[pc];
345
346            // -- Scope tracking --
347            if let Some(sd) = skip_depth {
348                match &cmd.command {
349                    CompiledCommand::ScopeStart => {
350                        scope_depth += 1;
351                    }
352                    CompiledCommand::ScopeEnd => {
353                        if scope_depth == sd {
354                            skip_depth = None;
355                        }
356                        scope_depth = scope_depth.saturating_sub(1);
357                    }
358                    _ => {}
359                }
360                pc += 1;
361                continue;
362            }
363
364            match &cmd.command {
365                CompiledCommand::ScopeStart => {
366                    scope_depth += 1;
367                    if !self.address_matches(
368                        &cmd.address,
369                        state,
370                        pc,
371                    ) {
372                        skip_depth = Some(scope_depth);
373                    }
374                    pc += 1;
375                    continue;
376                }
377                CompiledCommand::ScopeEnd => {
378                    scope_depth = scope_depth.saturating_sub(1);
379                    pc += 1;
380                    continue;
381                }
382                _ => {}
383            }
384
385            // Check if this command's address matches
386            if !self.address_matches(&cmd.address, state, pc) {
387                pc += 1;
388                continue;
389            }
390
391            // Execute the command
392            let flow = self.execute_one(
393                &cmd.command,
394                state,
395                line_reader,
396                writer,
397            )?;
398
399            match flow {
400                Flow::Continue => {
401                    pc += 1;
402                }
403                Flow::Restart => return Ok(Flow::Restart),
404                Flow::RestartScript => {
405                    return Ok(Flow::RestartScript)
406                }
407                Flow::Quit(c) => return Ok(Flow::Quit(c)),
408                Flow::QuitNoPrint(c) => return Ok(Flow::QuitNoPrint(c)),
409                Flow::Branch(ref label) => {
410                    if let Some(name) = label {
411                        if let Some(&target) = self.labels.get(name) {
412                            pc = target;
413                            continue;
414                        }
415                    }
416                    // Branch to end of script
417                    return Ok(Flow::Continue);
418                }
419            }
420        }
421
422        Ok(Flow::Continue)
423    }
424
425    fn execute_one<R: BufRead, W: Write>(
426        &self,
427        command: &CompiledCommand,
428        state: &mut State,
429        line_reader: &mut LineReader<R>,
430        writer: &mut W,
431    ) -> Result<Flow> {
432        match command {
433            CompiledCommand::Substitute(sub) => {
434                let (result, matched) = apply_substitution(
435                    &sub.pattern,
436                    &state.pattern_space,
437                    &sub.replacement,
438                    sub.global,
439                    sub.nth,
440                );
441                if matched {
442                    state.pattern_space = result;
443                    state.last_sub_success = true;
444                    if sub.print {
445                        self.write_line(
446                            writer,
447                            &state.pattern_space,
448                        )?;
449                    }
450                    if let Some(ref path) = sub.write_file {
451                        let mut f = fs::OpenOptions::new()
452                            .create(true)
453                            .append(true)
454                            .open(path)?;
455                        writeln!(f, "{}", state.pattern_space)?;
456                    }
457                }
458            }
459
460            CompiledCommand::Transliterate { from, to } => {
461                let mut new = String::with_capacity(
462                    state.pattern_space.len(),
463                );
464                for c in state.pattern_space.chars() {
465                    if let Some(pos) = from.iter().position(|&f| f == c) {
466                        new.push(to[pos]);
467                    } else {
468                        new.push(c);
469                    }
470                }
471                state.pattern_space = new;
472            }
473
474            CompiledCommand::Delete => return Ok(Flow::Restart),
475
476            CompiledCommand::DeleteFirstLine => {
477                if let Some(pos) = state.pattern_space.find('\n') {
478                    state.pattern_space =
479                        state.pattern_space[pos + 1..].to_string();
480                    // Re-run the script on the remaining pattern space
481                    // without reading a new input line
482                    return Ok(Flow::RestartScript);
483                } else {
484                    // No newline: same as `d` — discard and read next
485                    return Ok(Flow::Restart);
486                }
487            }
488
489            CompiledCommand::Print => {
490                self.write_line(writer, &state.pattern_space)?;
491            }
492
493            CompiledCommand::PrintFirstLine => {
494                if let Some(pos) = state.pattern_space.find('\n') {
495                    self.write_line(
496                        writer,
497                        &state.pattern_space[..pos],
498                    )?;
499                } else {
500                    self.write_line(writer, &state.pattern_space)?;
501                }
502            }
503
504            CompiledCommand::PrintLineNumber => {
505                self.write_line(
506                    writer,
507                    &state.line_number.to_string(),
508                )?;
509            }
510
511            CompiledCommand::List => {
512                // Print pattern space with non-printing chars shown
513                let listed = list_escape(&state.pattern_space);
514                self.write_line(writer, &format!("{}$", listed))?;
515            }
516
517            CompiledCommand::Next => {
518                // Print current pattern space (if not -n)
519                if !self.quiet {
520                    self.write_line(writer, &state.pattern_space)?;
521                }
522                self.flush_appends(state, writer)?;
523
524                // Read next line
525                if let Some((line, is_last)) = line_reader.read_line()? {
526                    state.line_number += 1;
527                    state.is_last_line = is_last;
528                    state.pattern_space = line;
529                } else {
530                    // No more input — we already printed, so exit
531                    // without the auto-print at end of cycle
532                    return Ok(Flow::QuitNoPrint(0));
533                }
534            }
535
536            CompiledCommand::NextAppend => {
537                if let Some((line, is_last)) = line_reader.read_line()? {
538                    state.line_number += 1;
539                    state.is_last_line = is_last;
540                    state.pattern_space.push('\n');
541                    state.pattern_space.push_str(&line);
542                } else {
543                    // Default: print and quit if no more input
544                    if !self.quiet {
545                        self.write_line(
546                            writer,
547                            &state.pattern_space,
548                        )?;
549                    }
550                    return Ok(Flow::QuitNoPrint(0));
551                }
552            }
553
554            CompiledCommand::HoldReplace => {
555                state.hold_space = state.pattern_space.clone();
556            }
557            CompiledCommand::HoldAppend => {
558                state.hold_space.push('\n');
559                state.hold_space.push_str(&state.pattern_space);
560            }
561            CompiledCommand::GetReplace => {
562                state.pattern_space = state.hold_space.clone();
563            }
564            CompiledCommand::GetAppend => {
565                state.pattern_space.push('\n');
566                let hold = state.hold_space.clone();
567                state.pattern_space.push_str(&hold);
568            }
569            CompiledCommand::Exchange => {
570                std::mem::swap(
571                    &mut state.pattern_space,
572                    &mut state.hold_space,
573                );
574            }
575
576            CompiledCommand::Append(text) => {
577                state.append_queue.push(text.clone());
578            }
579            CompiledCommand::Insert(text) => {
580                self.write_line(writer, text)?;
581            }
582            CompiledCommand::Change(text) => {
583                // Output the replacement text, then restart the cycle
584                // (skipping the auto-print of the pattern space)
585                self.write_line(writer, text)?;
586                return Ok(Flow::Restart);
587            }
588
589            CompiledCommand::Branch(label) => {
590                return Ok(Flow::Branch(label.clone()));
591            }
592            CompiledCommand::BranchIfSub(label) => {
593                if state.last_sub_success {
594                    state.last_sub_success = false;
595                    return Ok(Flow::Branch(label.clone()));
596                }
597            }
598            CompiledCommand::BranchIfNotSub(label) => {
599                if !state.last_sub_success {
600                    state.last_sub_success = false;
601                    return Ok(Flow::Branch(label.clone()));
602                }
603            }
604
605            CompiledCommand::ReadFile(path) => {
606                if let Ok(content) = fs::read_to_string(path) {
607                    state.append_queue.push(content.trim_end().to_string());
608                }
609            }
610            CompiledCommand::WriteFile(path) => {
611                let mut f = fs::OpenOptions::new()
612                    .create(true)
613                    .append(true)
614                    .open(path)?;
615                writeln!(f, "{}", state.pattern_space)?;
616            }
617
618            CompiledCommand::Quit(code) => {
619                return Ok(Flow::Quit(code.unwrap_or(0)));
620            }
621            CompiledCommand::QuitNoprint(code) => {
622                return Ok(Flow::QuitNoPrint(code.unwrap_or(0)));
623            }
624            CompiledCommand::ClearPattern => {
625                state.pattern_space.clear();
626            }
627
628            CompiledCommand::Label(_)
629            | CompiledCommand::Noop
630            | CompiledCommand::ScopeStart
631            | CompiledCommand::ScopeEnd => {}
632        }
633
634        Ok(Flow::Continue)
635    }
636
637    fn address_matches(
638        &self,
639        addr: &CompiledAddressRange,
640        state: &mut State,
641        cmd_index: usize,
642    ) -> bool {
643        let raw = match &addr.kind {
644            CompiledAddressKind::None => true,
645            CompiledAddressKind::Single(a) => {
646                addr_matches_line(a, state)
647            }
648            CompiledAddressKind::Range(start, end) => {
649                let active = state
650                    .range_active
651                    .get(cmd_index)
652                    .copied()
653                    .unwrap_or(false);
654
655                if active {
656                    // We're in the range; check if end matches
657                    if addr_matches_line(end, state) {
658                        // End of range — still in range for this line
659                        if let Some(v) =
660                            state.range_active.get_mut(cmd_index)
661                        {
662                            *v = false;
663                        }
664                    }
665                    true
666                } else if addr_matches_line(start, state) {
667                    // Start of range — activate it.
668                    if let Some(v) =
669                        state.range_active.get_mut(cmd_index)
670                    {
671                        *v = true;
672                    }
673                    // Per POSIX/GNU: regex end addresses are NOT
674                    // checked on the start line. But line-number end
675                    // addresses that are already at or past the
676                    // current line close the range immediately (GNU
677                    // extension: addr2 <= addr1 means one-line range).
678                    if is_line_addr_at_or_before(end, state) {
679                        if let Some(v) =
680                            state.range_active.get_mut(cmd_index)
681                        {
682                            *v = false;
683                        }
684                    }
685                    true
686                } else {
687                    false
688                }
689            }
690        };
691
692        if addr.negated { !raw } else { raw }
693    }
694
695    fn flush_appends<W: Write>(
696        &self,
697        state: &State,
698        writer: &mut W,
699    ) -> Result<()> {
700        for text in &state.append_queue {
701            self.write_line(writer, text)?;
702        }
703        Ok(())
704    }
705}
706
707// ---------------------------------------------------------------------------
708// Address matching helper
709// ---------------------------------------------------------------------------
710
711/// Check if the address is a line number at or before the current line.
712/// Used for immediate range closure when the end address is a line number.
713fn is_line_addr_at_or_before(
714    addr: &CompiledAddress,
715    state: &State,
716) -> bool {
717    match addr {
718        CompiledAddress::Line(n) => *n <= state.line_number,
719        _ => false,
720    }
721}
722
723fn addr_matches_line(addr: &CompiledAddress, state: &State) -> bool {
724    match addr {
725        CompiledAddress::Line(n) => state.line_number == *n,
726        CompiledAddress::Last => state.is_last_line,
727        CompiledAddress::Regex(re) => re.is_match(&state.pattern_space),
728        CompiledAddress::Step { first, step } => {
729            if *step == 0 {
730                state.line_number == *first
731            } else if *first == 0 {
732                state.line_number % step == 0
733            } else {
734                state.line_number >= *first
735                    && (state.line_number - first) % step == 0
736            }
737        }
738    }
739}
740
741// ---------------------------------------------------------------------------
742// Substitution (inspired by sd's replacer)
743// ---------------------------------------------------------------------------
744
745/// Apply a sed-style substitution. Returns the new string and whether any
746/// replacement was made.
747fn apply_substitution(
748    regex: &Regex,
749    text: &str,
750    replacement: &str,
751    global: bool,
752    nth: Option<usize>,
753) -> (String, bool) {
754    let mut result = String::with_capacity(text.len());
755    let mut last_end = 0;
756    let mut match_count: usize = 0;
757    let mut made_substitution = false;
758
759    for captures in regex.captures_iter(text) {
760        let whole = captures.get(0).unwrap();
761        match_count += 1;
762
763        let should_replace = match nth {
764            Some(n) => match_count == n,
765            None => global || match_count == 1,
766        };
767
768        if should_replace {
769            result.push_str(&text[last_end..whole.start()]);
770            apply_sed_replacement(&captures, replacement, &mut result);
771            last_end = whole.end();
772            made_substitution = true;
773            if !global {
774                break;
775            }
776        } else if !global && nth.is_none() {
777            break;
778        }
779    }
780
781    result.push_str(&text[last_end..]);
782    (result, made_substitution)
783}
784
785/// Interpret a sed-style replacement string against captures.
786///
787/// Handles:
788///   &       → whole match ($0)
789///   \1..\9  → numbered capture group
790///   \n      → newline
791///   \t      → tab
792///   \\      → literal backslash
793///   \&      → literal &
794fn apply_sed_replacement(
795    captures: &regex::Captures,
796    replacement: &str,
797    output: &mut String,
798) {
799    let mut chars = replacement.chars().peekable();
800    while let Some(c) = chars.next() {
801        match c {
802            '\\' => {
803                if let Some(&next) = chars.peek() {
804                    match next {
805                        '0'..='9' => {
806                            chars.next();
807                            let group = (next as u8 - b'0') as usize;
808                            if let Some(m) = captures.get(group) {
809                                output.push_str(m.as_str());
810                            }
811                        }
812                        'n' => {
813                            chars.next();
814                            output.push('\n');
815                        }
816                        't' => {
817                            chars.next();
818                            output.push('\t');
819                        }
820                        '\\' => {
821                            chars.next();
822                            output.push('\\');
823                        }
824                        '&' => {
825                            chars.next();
826                            output.push('&');
827                        }
828                        _ => {
829                            // Not a recognized escape; pass through
830                            output.push('\\');
831                        }
832                    }
833                } else {
834                    output.push('\\');
835                }
836            }
837            '&' => {
838                if let Some(m) = captures.get(0) {
839                    output.push_str(m.as_str());
840                }
841            }
842            _ => output.push(c),
843        }
844    }
845}
846
847// ---------------------------------------------------------------------------
848// List escaping (for `l` command)
849// ---------------------------------------------------------------------------
850
851fn list_escape(s: &str) -> String {
852    let mut out = String::new();
853    for c in s.chars() {
854        match c {
855            '\\' => out.push_str("\\\\"),
856            '\n' => out.push_str("\\n"),
857            '\t' => out.push_str("\\t"),
858            '\r' => out.push_str("\\r"),
859            '\x07' => out.push_str("\\a"),
860            '\x08' => out.push_str("\\b"),
861            '\x0C' => out.push_str("\\f"),
862            '\x1B' => out.push_str("\\e"),
863            c if c.is_control() => {
864                out.push_str(&format!("\\x{:02X}", c as u32));
865            }
866            c => out.push(c),
867        }
868    }
869    out
870}
871
872// ---------------------------------------------------------------------------
873// Line reader with lookahead (to detect last line for $ address)
874// ---------------------------------------------------------------------------
875
876struct LineReader<R: BufRead> {
877    reader: R,
878    buf: String,
879    pending: Option<String>,
880    exhausted: bool,
881    null_data: bool,
882}
883
884impl<R: BufRead> LineReader<R> {
885    fn new(mut reader: R, null_data: bool) -> Self {
886        let mut buf = String::new();
887        let pending = if null_data {
888            read_until_null(&mut reader, &mut buf)
889        } else {
890            match reader.read_line(&mut buf) {
891                Ok(0) => None,
892                Ok(_) => Some(chomp(&buf)),
893                Err(_) => None,
894            }
895        };
896
897        let exhausted = pending.is_none();
898        Self {
899            reader,
900            buf: String::new(),
901            pending,
902            exhausted,
903            null_data,
904        }
905    }
906
907    /// Returns (line_content, is_last_line)
908    fn read_line(&mut self) -> Result<Option<(String, bool)>> {
909        let current = match self.pending.take() {
910            Some(line) => line,
911            None => return Ok(None),
912        };
913
914        // Read-ahead to determine if `current` is the last line
915        self.buf.clear();
916        let next = if self.null_data {
917            read_until_null(&mut self.reader, &mut self.buf)
918        } else {
919            match self.reader.read_line(&mut self.buf) {
920                Ok(0) => None,
921                Ok(_) => Some(chomp(&self.buf)),
922                Err(e) => return Err(Error::Io(e)),
923            }
924        };
925
926        let is_last = next.is_none();
927        self.pending = next;
928        self.exhausted = is_last;
929
930        Ok(Some((current, is_last)))
931    }
932}
933
934/// Remove trailing newline (LF or CRLF) from a line.
935fn chomp(s: &str) -> String {
936    let mut s = s.to_string();
937    if s.ends_with('\n') {
938        s.pop();
939        if s.ends_with('\r') {
940            s.pop();
941        }
942    }
943    s
944}
945
946/// Read until NUL byte for -z/--null-data mode.
947fn read_until_null<R: BufRead>(
948    reader: &mut R,
949    buf: &mut String,
950) -> Option<String> {
951    buf.clear();
952    let mut byte_buf = Vec::new();
953    match reader.read_until(b'\0', &mut byte_buf) {
954        Ok(0) => None,
955        Ok(_) => {
956            // Remove trailing NUL
957            if byte_buf.last() == Some(&b'\0') {
958                byte_buf.pop();
959            }
960            Some(String::from_utf8_lossy(&byte_buf).into_owned())
961        }
962        Err(_) => None,
963    }
964}
965
966// ---------------------------------------------------------------------------
967// Atomic file write (adapted from sd)
968// ---------------------------------------------------------------------------
969
970fn write_atomic(path: &Path, data: &[u8]) -> Result<()> {
971    let path = fs::canonicalize(path)?;
972    let parent = path
973        .parent()
974        .ok_or_else(|| Error::InvalidPath(path.to_path_buf()))?;
975
976    let temp = tempfile::NamedTempFile::new_in(parent)?;
977    let file = temp.as_file();
978
979    // Copy permissions from original
980    if let Ok(metadata) = fs::metadata(&path) {
981        file.set_permissions(metadata.permissions()).ok();
982
983        #[cfg(unix)]
984        {
985            use std::os::unix::fs::{MetadataExt, fchown};
986            let _ =
987                fchown(file, Some(metadata.uid()), Some(metadata.gid()));
988        }
989    }
990
991    let mut writer = io::BufWriter::new(file);
992    writer.write_all(data)?;
993    writer.flush()?;
994    drop(writer);
995
996    temp.persist(&path)?;
997    Ok(())
998}
999
1000// ---------------------------------------------------------------------------
1001// Compilation: parsed commands → compiled commands
1002// ---------------------------------------------------------------------------
1003
1004fn compile_commands(
1005    parsed: Vec<SedCommand>,
1006) -> Result<(Vec<CompiledSedCommand>, HashMap<String, usize>)> {
1007    let mut compiled = Vec::new();
1008    let mut labels = HashMap::new();
1009    flatten_and_compile(parsed, &mut compiled, &mut labels)?;
1010    Ok((compiled, labels))
1011}
1012
1013fn flatten_and_compile(
1014    commands: Vec<SedCommand>,
1015    compiled: &mut Vec<CompiledSedCommand>,
1016    labels: &mut HashMap<String, usize>,
1017) -> Result<()> {
1018    for cmd in commands {
1019        match cmd.command {
1020            Command::Block(block) => {
1021                let addr = compile_address_range(cmd.address)?;
1022                compiled.push(CompiledSedCommand {
1023                    address: addr,
1024                    command: CompiledCommand::ScopeStart,
1025                });
1026                flatten_and_compile(block, compiled, labels)?;
1027                compiled.push(CompiledSedCommand {
1028                    address: CompiledAddressRange::none(),
1029                    command: CompiledCommand::ScopeEnd,
1030                });
1031            }
1032            Command::Label(ref name) => {
1033                labels.insert(name.clone(), compiled.len());
1034                compiled.push(CompiledSedCommand {
1035                    address: CompiledAddressRange::none(),
1036                    command: CompiledCommand::Label(name.clone()),
1037                });
1038            }
1039            other => {
1040                let addr = compile_address_range(cmd.address)?;
1041                let cc = compile_single_command(other)?;
1042                compiled.push(CompiledSedCommand {
1043                    address: addr,
1044                    command: cc,
1045                });
1046            }
1047        }
1048    }
1049    Ok(())
1050}
1051
1052fn compile_address_range(
1053    range: AddressRange,
1054) -> Result<CompiledAddressRange> {
1055    match range {
1056        AddressRange::None => Ok(CompiledAddressRange::none()),
1057        AddressRange::Single { addr, negated } => {
1058            Ok(CompiledAddressRange {
1059                kind: CompiledAddressKind::Single(
1060                    compile_address(addr)?,
1061                ),
1062                negated,
1063            })
1064        }
1065        AddressRange::Range {
1066            start,
1067            end,
1068            negated,
1069        } => Ok(CompiledAddressRange {
1070            kind: CompiledAddressKind::Range(
1071                compile_address(start)?,
1072                compile_address(end)?,
1073            ),
1074            negated,
1075        }),
1076    }
1077}
1078
1079fn compile_address(addr: Address) -> Result<CompiledAddress> {
1080    match addr {
1081        Address::Line(n) => Ok(CompiledAddress::Line(n)),
1082        Address::Last => Ok(CompiledAddress::Last),
1083        Address::Regex(pattern) => {
1084            let re = regex::RegexBuilder::new(&pattern)
1085                .multi_line(true)
1086                .build()?;
1087            Ok(CompiledAddress::Regex(re))
1088        }
1089        Address::Step { first, step } => {
1090            Ok(CompiledAddress::Step { first, step })
1091        }
1092    }
1093}
1094
1095fn compile_single_command(cmd: Command) -> Result<CompiledCommand> {
1096    match cmd {
1097        Command::Substitute(sub) => {
1098            let re = regex::RegexBuilder::new(&sub.pattern)
1099                .case_insensitive(sub.case_insensitive)
1100                .multi_line(true)
1101                .build()?;
1102            Ok(CompiledCommand::Substitute(CompiledSubstitute {
1103                pattern: re,
1104                replacement: sub.replacement,
1105                global: sub.global,
1106                print: sub.print,
1107                nth: sub.nth,
1108                write_file: sub.write_file,
1109            }))
1110        }
1111        Command::Transliterate { from, to } => {
1112            Ok(CompiledCommand::Transliterate { from, to })
1113        }
1114        Command::Print => Ok(CompiledCommand::Print),
1115        Command::PrintFirstLine => Ok(CompiledCommand::PrintFirstLine),
1116        Command::PrintLineNumber => Ok(CompiledCommand::PrintLineNumber),
1117        Command::List => Ok(CompiledCommand::List),
1118        Command::Delete => Ok(CompiledCommand::Delete),
1119        Command::DeleteFirstLine => {
1120            Ok(CompiledCommand::DeleteFirstLine)
1121        }
1122        Command::Next => Ok(CompiledCommand::Next),
1123        Command::NextAppend => Ok(CompiledCommand::NextAppend),
1124        Command::HoldReplace => Ok(CompiledCommand::HoldReplace),
1125        Command::HoldAppend => Ok(CompiledCommand::HoldAppend),
1126        Command::GetReplace => Ok(CompiledCommand::GetReplace),
1127        Command::GetAppend => Ok(CompiledCommand::GetAppend),
1128        Command::Exchange => Ok(CompiledCommand::Exchange),
1129        Command::Append(t) => Ok(CompiledCommand::Append(t)),
1130        Command::Insert(t) => Ok(CompiledCommand::Insert(t)),
1131        Command::Change(t) => Ok(CompiledCommand::Change(t)),
1132        Command::Branch(l) => Ok(CompiledCommand::Branch(l)),
1133        Command::BranchIfSub(l) => Ok(CompiledCommand::BranchIfSub(l)),
1134        Command::BranchIfNotSub(l) => {
1135            Ok(CompiledCommand::BranchIfNotSub(l))
1136        }
1137        Command::ReadFile(f) => Ok(CompiledCommand::ReadFile(f)),
1138        Command::WriteFile(f) => Ok(CompiledCommand::WriteFile(f)),
1139        Command::WriteFirstLine(f) => {
1140            Ok(CompiledCommand::WriteFile(f))
1141        }
1142        Command::Quit(c) => Ok(CompiledCommand::Quit(c)),
1143        Command::QuitNoprint(c) => Ok(CompiledCommand::QuitNoprint(c)),
1144        Command::ClearPattern => Ok(CompiledCommand::ClearPattern),
1145        Command::Noop => Ok(CompiledCommand::Noop),
1146        Command::Label(l) => Ok(CompiledCommand::Label(l)),
1147        Command::Block(_) => {
1148            unreachable!("blocks should be flattened before this point")
1149        }
1150    }
1151}
1152
1153// ---------------------------------------------------------------------------
1154// Tests
1155// ---------------------------------------------------------------------------
1156
1157#[cfg(test)]
1158mod tests {
1159    use super::*;
1160    use crate::command;
1161
1162    fn run_sed(script: &str, input: &str) -> String {
1163        run_sed_opts(script, input, false)
1164    }
1165
1166    fn run_sed_opts(script: &str, input: &str, quiet: bool) -> String {
1167        let parsed = command::parse(script).unwrap();
1168        let options = Options {
1169            quiet,
1170            expressions: vec![],
1171            script_files: vec![],
1172            in_place: None,
1173            extended_regexp: false,
1174            separate: false,
1175            null_data: false,
1176            args: vec![],
1177        };
1178        let engine = Engine::new(parsed, &options).unwrap();
1179        let reader = io::Cursor::new(input.as_bytes());
1180        let mut output = Vec::new();
1181        engine
1182            .process_stream(io::BufReader::new(reader), &mut output)
1183            .unwrap();
1184        String::from_utf8(output).unwrap()
1185    }
1186
1187    #[test]
1188    fn substitute_basic() {
1189        assert_eq!(run_sed("s/foo/bar/", "foo\n"), "bar\n");
1190    }
1191
1192    #[test]
1193    fn substitute_global() {
1194        assert_eq!(
1195            run_sed("s/o/0/g", "foo boo\n"),
1196            "f00 b00\n"
1197        );
1198    }
1199
1200    #[test]
1201    fn substitute_first_only() {
1202        assert_eq!(run_sed("s/o/0/", "foo\n"), "f0o\n");
1203    }
1204
1205    #[test]
1206    fn substitute_nth() {
1207        assert_eq!(run_sed("s/o/0/2", "foo boo\n"), "fo0 boo\n");
1208    }
1209
1210    #[test]
1211    fn substitute_ampersand() {
1212        assert_eq!(
1213            run_sed("s/foo/[&]/", "foo\n"),
1214            "[foo]\n"
1215        );
1216    }
1217
1218    #[test]
1219    fn substitute_backreference() {
1220        assert_eq!(
1221            run_sed("s/(f)(o+)/\\2\\1/", "foo\n"),
1222            "oof\n"
1223        );
1224    }
1225
1226    #[test]
1227    fn delete_command() {
1228        assert_eq!(run_sed("2d", "a\nb\nc\n"), "a\nc\n");
1229    }
1230
1231    #[test]
1232    fn print_command_quiet() {
1233        assert_eq!(
1234            run_sed_opts("2p", "a\nb\nc\n", true),
1235            "b\n"
1236        );
1237    }
1238
1239    #[test]
1240    fn address_regex() {
1241        assert_eq!(
1242            run_sed("/^b/d", "apple\nbanana\ncherry\n"),
1243            "apple\ncherry\n"
1244        );
1245    }
1246
1247    #[test]
1248    fn address_range() {
1249        assert_eq!(
1250            run_sed("2,3d", "a\nb\nc\nd\n"),
1251            "a\nd\n"
1252        );
1253    }
1254
1255    #[test]
1256    fn address_negation() {
1257        assert_eq!(
1258            run_sed("2!d", "a\nb\nc\n"),
1259            "b\n"
1260        );
1261    }
1262
1263    #[test]
1264    fn transliterate() {
1265        assert_eq!(
1266            run_sed("y/abc/xyz/", "abc\n"),
1267            "xyz\n"
1268        );
1269    }
1270
1271    #[test]
1272    fn quit_command() {
1273        assert_eq!(run_sed("2q", "a\nb\nc\n"), "a\nb\n");
1274    }
1275
1276    #[test]
1277    fn hold_and_get() {
1278        // Store line 1 in hold, append it after line 2
1279        assert_eq!(
1280            run_sed("1h;2G", "first\nsecond\n"),
1281            "first\nsecond\nfirst\n"
1282        );
1283    }
1284
1285    #[test]
1286    fn exchange() {
1287        assert_eq!(
1288            run_sed("1{h;d};2{x}", "first\nsecond\n"),
1289            "first\n"
1290        );
1291    }
1292
1293    #[test]
1294    fn labels_and_branch() {
1295        // Simple loop: replace one 'a' at a time with 'b', branch back if substitution made
1296        assert_eq!(
1297            run_sed(":l\ns/a/b/\nt l", "aaa\n"),
1298            "bbb\n"
1299        );
1300    }
1301
1302    #[test]
1303    fn append_text() {
1304        assert_eq!(
1305            run_sed("1a after first", "first\nsecond\n"),
1306            "first\nafter first\nsecond\n"
1307        );
1308    }
1309
1310    #[test]
1311    fn insert_text() {
1312        assert_eq!(
1313            run_sed("2i before second", "first\nsecond\n"),
1314            "first\nbefore second\nsecond\n"
1315        );
1316    }
1317
1318    #[test]
1319    fn line_number() {
1320        assert_eq!(
1321            run_sed("=", "a\nb\n"),
1322            "1\na\n2\nb\n"
1323        );
1324    }
1325
1326    #[test]
1327    fn multiple_commands() {
1328        assert_eq!(
1329            run_sed("s/a/x/; s/b/y/", "ab\n"),
1330            "xy\n"
1331        );
1332    }
1333
1334    #[test]
1335    fn last_line_address() {
1336        assert_eq!(
1337            run_sed("$d", "a\nb\nc\n"),
1338            "a\nb\n"
1339        );
1340    }
1341
1342    #[test]
1343    fn clear_pattern() {
1344        assert_eq!(run_sed("z", "hello\n"), "\n");
1345    }
1346
1347    #[test]
1348    fn custom_delimiter() {
1349        assert_eq!(
1350            run_sed("s|foo|bar|", "foo\n"),
1351            "bar\n"
1352        );
1353    }
1354
1355    #[test]
1356    fn case_insensitive_sub() {
1357        assert_eq!(
1358            run_sed("s/foo/bar/i", "FOO\n"),
1359            "bar\n"
1360        );
1361    }
1362
1363    #[test]
1364    fn block_with_address() {
1365        assert_eq!(
1366            run_sed("/a/ { s/a/x/; s/$/!/ }", "a\nb\n"),
1367            "x!\nb\n"
1368        );
1369    }
1370
1371    #[test]
1372    fn regex_range_address() {
1373        assert_eq!(
1374            run_sed("/start/,/end/d", "a\nstart\nb\nend\nc\n"),
1375            "a\nc\n"
1376        );
1377    }
1378
1379    #[test]
1380    fn empty_input() {
1381        assert_eq!(run_sed("s/a/b/", ""), "");
1382    }
1383
1384    #[test]
1385    fn passthrough_no_match() {
1386        assert_eq!(
1387            run_sed("s/xyz/abc/", "hello\n"),
1388            "hello\n"
1389        );
1390    }
1391
1392    // ---------------------------------------------------------------
1393    // Comprehensive coverage tests
1394    // ---------------------------------------------------------------
1395
1396    // -- n command --
1397
1398    #[test]
1399    fn n_single_line() {
1400        // n on the only line: print it, exit (no double-print)
1401        assert_eq!(run_sed("n", "a\n"), "a\n");
1402    }
1403
1404    #[test]
1405    fn n_two_lines() {
1406        // n on line 1: print "a", read "b" into PS, auto-print "b"
1407        assert_eq!(run_sed("n", "a\nb\n"), "a\nb\n");
1408    }
1409
1410    #[test]
1411    fn n_with_command_after() {
1412        // n reads next line, then subsequent commands operate on it
1413        assert_eq!(
1414            run_sed("n;s/b/X/", "a\nb\nc\n"),
1415            "a\nX\nc\n"
1416        );
1417    }
1418
1419    #[test]
1420    fn n_quiet_mode() {
1421        // With -n, n does NOT print before reading next
1422        assert_eq!(
1423            run_sed_opts("n;p", "a\nb\nc\n", true),
1424            "b\n"
1425        );
1426    }
1427
1428    // -- N command --
1429
1430    #[test]
1431    fn big_n_appends() {
1432        // N appends next line to pattern space with \n
1433        assert_eq!(
1434            run_sed("N;s/\\n/ /", "a\nb\n"),
1435            "a b\n"
1436        );
1437    }
1438
1439    #[test]
1440    fn big_n_at_end() {
1441        // N at last line: print and exit
1442        assert_eq!(
1443            run_sed("N", "a\nb\nc\n"),
1444            "a\nb\nc\n"
1445        );
1446    }
1447
1448    // -- D command --
1449
1450    #[test]
1451    fn big_d_deletes_first_line_of_pattern() {
1452        // N;P;D is the classic "sliding window" idiom
1453        assert_eq!(
1454            run_sed("N;P;D", "a\nb\nc\n"),
1455            "a\nb\nc\n"
1456        );
1457    }
1458
1459    #[test]
1460    fn big_d_single_line() {
1461        // D on single-line pattern space acts like d (deletes every line)
1462        assert_eq!(run_sed("D", "a\nb\n"), "");
1463    }
1464
1465    // -- c command --
1466
1467    #[test]
1468    fn change_single_address() {
1469        assert_eq!(
1470            run_sed("2c REPLACED", "a\nb\nc\n"),
1471            "a\nREPLACED\nc\n"
1472        );
1473    }
1474
1475    #[test]
1476    fn change_regex_address() {
1477        assert_eq!(
1478            run_sed("/b/c GONE", "a\nb\nc\n"),
1479            "a\nGONE\nc\n"
1480        );
1481    }
1482
1483    // -- P command --
1484
1485    #[test]
1486    fn big_p_first_line() {
1487        // P prints up to first \n in pattern space
1488        assert_eq!(
1489            run_sed("N;P", "first\nsecond\n"),
1490            "first\nfirst\nsecond\n"
1491        );
1492    }
1493
1494    // -- i (insert) with address --
1495
1496    #[test]
1497    fn insert_before_last() {
1498        assert_eq!(
1499            run_sed("$i END", "a\nb\n"),
1500            "a\nEND\nb\n"
1501        );
1502    }
1503
1504    // -- a (append) with regex address --
1505
1506    #[test]
1507    fn append_after_match() {
1508        assert_eq!(
1509            run_sed("/b/a AFTER", "a\nb\nc\n"),
1510            "a\nb\nAFTER\nc\n"
1511        );
1512    }
1513
1514    // -- Hold space operations --
1515
1516    #[test]
1517    fn hold_append_and_get() {
1518        // H appends to hold; G appends hold to pattern
1519        assert_eq!(
1520            run_sed("1H;2G", "first\nsecond\n"),
1521            "first\nsecond\n\nfirst\n"
1522        );
1523    }
1524
1525    #[test]
1526    fn hold_get_replace() {
1527        // h copies to hold; g copies from hold to pattern
1528        assert_eq!(
1529            run_sed("1h;2g", "first\nsecond\n"),
1530            "first\nfirst\n"
1531        );
1532    }
1533
1534    #[test]
1535    fn reverse_lines() {
1536        // Classic sed reverse: 1!G;h;$!d
1537        assert_eq!(
1538            run_sed("1!G;h;$!d", "a\nb\nc\n"),
1539            "c\nb\na\n"
1540        );
1541    }
1542
1543    // -- x (exchange) --
1544
1545    #[test]
1546    fn exchange_basic() {
1547        // Hold starts empty, so x on line 1 gives empty, hold gets "a"
1548        assert_eq!(
1549            run_sed("x", "a\nb\n"),
1550            "\na\n"
1551        );
1552    }
1553
1554    // -- Address ranges --
1555
1556    #[test]
1557    fn range_regex_to_regex() {
1558        assert_eq!(
1559            run_sed("/start/,/end/d", "a\nstart\nb\nend\nc\n"),
1560            "a\nc\n"
1561        );
1562    }
1563
1564    #[test]
1565    fn range_line_to_last() {
1566        assert_eq!(
1567            run_sed("2,$d", "a\nb\nc\nd\n"),
1568            "a\n"
1569        );
1570    }
1571
1572    #[test]
1573    fn range_negated() {
1574        // Delete everything NOT in range 2,3
1575        assert_eq!(
1576            run_sed("2,3!d", "a\nb\nc\nd\n"),
1577            "b\nc\n"
1578        );
1579    }
1580
1581    #[test]
1582    fn range_start_equals_end_on_same_line() {
1583        // 2,2d only deletes line 2
1584        assert_eq!(
1585            run_sed("2,2d", "a\nb\nc\n"),
1586            "a\nc\n"
1587        );
1588    }
1589
1590    #[test]
1591    fn range_regex_no_check_on_start_line() {
1592        // When start and end regexes could match same line,
1593        // end is NOT checked on start line (POSIX/GNU behavior)
1594        assert_eq!(
1595            run_sed_opts(
1596                "/\\[start\\]/,/\\[/p",
1597                "[start]\nfoo\nbar\n[end]\n",
1598                true,
1599            ),
1600            "[start]\nfoo\nbar\n[end]\n"
1601        );
1602    }
1603
1604    // -- Step address --
1605
1606    #[test]
1607    fn step_even_lines() {
1608        assert_eq!(
1609            run_sed("0~2d", "a\nb\nc\nd\ne\n"),
1610            "a\nc\ne\n"
1611        );
1612    }
1613
1614    #[test]
1615    fn step_odd_lines() {
1616        assert_eq!(
1617            run_sed("1~2d", "a\nb\nc\nd\ne\n"),
1618            "b\nd\n"
1619        );
1620    }
1621
1622    // -- Substitute flags --
1623
1624    #[test]
1625    fn sub_nth_3() {
1626        assert_eq!(run_sed("s/a/X/3", "aaaaa\n"), "aaXaa\n");
1627    }
1628
1629    #[test]
1630    fn sub_global_and_print() {
1631        assert_eq!(
1632            run_sed_opts("s/a/X/gp", "aaa\nbbb\n", true),
1633            "XXX\n"
1634        );
1635    }
1636
1637    #[test]
1638    fn sub_escaped_delimiter() {
1639        // Use | as delimiter, pattern contains |
1640        assert_eq!(
1641            run_sed("s/a\\/b/X/", "a/b\n"),
1642            "X\n"
1643        );
1644    }
1645
1646    #[test]
1647    fn sub_newline_in_replacement() {
1648        assert_eq!(
1649            run_sed("s/a/X\\nY/", "a\n"),
1650            "X\nY\n"
1651        );
1652    }
1653
1654    #[test]
1655    fn sub_tab_in_replacement() {
1656        assert_eq!(
1657            run_sed("s/a/X\\tY/", "a\n"),
1658            "X\tY\n"
1659        );
1660    }
1661
1662    #[test]
1663    fn sub_literal_ampersand() {
1664        // \& should be literal &
1665        assert_eq!(
1666            run_sed("s/foo/\\&/", "foo\n"),
1667            "&\n"
1668        );
1669    }
1670
1671    #[test]
1672    fn sub_literal_backslash() {
1673        // \\\\ in replacement → literal backslash
1674        assert_eq!(
1675            run_sed("s/a/\\\\/", "a\n"),
1676            "\\\n"
1677        );
1678    }
1679
1680    // -- Branching --
1681
1682    #[test]
1683    fn branch_unconditional() {
1684        // b skip jumps forward past the d command, preserving output
1685        assert_eq!(
1686            run_sed("b skip;d;:skip", "hello\n"),
1687            "hello\n"
1688        );
1689    }
1690
1691    #[test]
1692    fn branch_unconditional_no_label() {
1693        // b (no label) branches to end of script
1694        assert_eq!(
1695            run_sed("b\nd", "hello\n"),
1696            "hello\n"
1697        );
1698    }
1699
1700    #[test]
1701    fn branch_if_sub_no_match() {
1702        // t should NOT branch if no sub was made
1703        assert_eq!(
1704            run_sed("s/x/y/;t end;s/a/X/;:end", "abc\n"),
1705            "Xbc\n"
1706        );
1707    }
1708
1709    #[test]
1710    fn branch_if_not_sub() {
1711        // T branches if last sub was NOT successful
1712        assert_eq!(
1713            run_sed("s/x/y/;T skip;s/a/SHOULD_NOT/;:skip", "abc\n"),
1714            "abc\n"
1715        );
1716    }
1717
1718    // -- l command (list) --
1719
1720    #[test]
1721    fn list_command() {
1722        assert_eq!(
1723            run_sed("l", "a\tb\n"),
1724            "a\\tb$\na\tb\n"
1725        );
1726    }
1727
1728    // -- = command (line number) with address --
1729
1730    #[test]
1731    fn line_number_with_address() {
1732        assert_eq!(
1733            run_sed("2=", "a\nb\nc\n"),
1734            "a\n2\nb\nc\n"
1735        );
1736    }
1737
1738    // -- z (clear pattern) --
1739
1740    #[test]
1741    fn clear_pattern_in_block() {
1742        assert_eq!(
1743            run_sed("/hello/{z;s/$/EMPTY/}", "hello\nworld\n"),
1744            "EMPTY\nworld\n"
1745        );
1746    }
1747
1748    // -- Multiple commands, complex scripts --
1749
1750    #[test]
1751    fn strip_html_tags() {
1752        assert_eq!(
1753            run_sed("s/<[^>]*>//g", "<b>bold</b>\n"),
1754            "bold\n"
1755        );
1756    }
1757
1758    #[test]
1759    fn sed_multiline_join() {
1760        // Join all lines with space using N and s
1761        assert_eq!(
1762            run_sed(":a;N;s/\\n/ /;$!b a", "one\ntwo\nthree\n"),
1763            "one two three\n"
1764        );
1765    }
1766
1767    #[test]
1768    fn double_space() {
1769        // Classic double-spacing: G appends empty hold to pattern
1770        assert_eq!(
1771            run_sed("G", "a\nb\n"),
1772            "a\n\nb\n\n"
1773        );
1774    }
1775
1776    #[test]
1777    fn delete_empty_lines() {
1778        assert_eq!(
1779            run_sed("/^$/d", "a\n\nb\n\nc\n"),
1780            "a\nb\nc\n"
1781        );
1782    }
1783
1784    #[test]
1785    fn multiple_expressions() {
1786        // Simulates -e 'cmd1' -e 'cmd2' by joining with newline
1787        assert_eq!(
1788            run_sed("s/a/x/\ns/b/y/", "ab\n"),
1789            "xy\n"
1790        );
1791    }
1792
1793    #[test]
1794    fn nested_blocks() {
1795        assert_eq!(
1796            run_sed("1{/a/{s/a/X/}}", "abc\ndef\n"),
1797            "Xbc\ndef\n"
1798        );
1799    }
1800
1801    // -- Edge cases --
1802
1803    #[test]
1804    fn line_with_no_trailing_newline() {
1805        // Input without trailing newline
1806        assert_eq!(run_sed("s/a/X/", "abc"), "Xbc\n");
1807    }
1808
1809    #[test]
1810    fn single_empty_line() {
1811        assert_eq!(run_sed("s/^$/EMPTY/", "\n"), "EMPTY\n");
1812    }
1813
1814    #[test]
1815    fn substitute_with_empty_replacement() {
1816        assert_eq!(run_sed("s/foo//", "foobar\n"), "bar\n");
1817    }
1818
1819    #[test]
1820    fn substitute_with_empty_pattern_match() {
1821        // Regex that matches empty string at beginning
1822        assert_eq!(run_sed("s/^/PREFIX: /", "hello\n"), "PREFIX: hello\n");
1823    }
1824
1825    #[test]
1826    fn multiple_ranges_interleaved() {
1827        // Two separate range commands
1828        let input = "a\nb\nc\nd\ne\nf\n";
1829        assert_eq!(
1830            run_sed("2,3s/./X/;5,6s/./Y/", input),
1831            "a\nX\nX\nd\nY\nY\n"
1832        );
1833    }
1834
1835    #[test]
1836    fn regex_special_chars() {
1837        assert_eq!(
1838            run_sed("s/\\./X/g", "a.b.c\n"),
1839            "aXbXc\n"
1840        );
1841    }
1842
1843    #[test]
1844    fn regex_anchors() {
1845        assert_eq!(
1846            run_sed("s/^/> /", "hello\n"),
1847            "> hello\n"
1848        );
1849        assert_eq!(
1850            run_sed("s/$/ </", "hello\n"),
1851            "hello <\n"
1852        );
1853    }
1854
1855    // -- q and Q --
1856
1857    #[test]
1858    fn quit_prints_current_line() {
1859        // q should print the current line before quitting
1860        assert_eq!(run_sed("2q", "a\nb\nc\n"), "a\nb\n");
1861    }
1862
1863    #[test]
1864    fn quit_no_print() {
1865        // Q quits without printing pattern space
1866        assert_eq!(run_sed("2Q", "a\nb\nc\n"), "a\n");
1867    }
1868
1869    // -- Complex real-world scripts --
1870
1871    #[test]
1872    fn remove_trailing_whitespace() {
1873        assert_eq!(
1874            run_sed("s/[ \t]*$//", "hello   \nworld\t\t\n"),
1875            "hello\nworld\n"
1876        );
1877    }
1878
1879    #[test]
1880    fn number_lines() {
1881        // Print line number then line (like nl)
1882        assert_eq!(
1883            run_sed("=;s/^/  /", "a\nb\n"),
1884            "1\n  a\n2\n  b\n"
1885        );
1886    }
1887
1888    #[test]
1889    fn print_only_matches() {
1890        // -n with /pattern/p
1891        assert_eq!(
1892            run_sed_opts("/foo/p", "foo\nbar\nfoo2\n", true),
1893            "foo\nfoo2\n"
1894        );
1895    }
1896
1897    #[test]
1898    fn delete_first_and_last() {
1899        assert_eq!(
1900            run_sed("1d;$d", "a\nb\nc\n"),
1901            "b\n"
1902        );
1903    }
1904
1905    #[test]
1906    fn change_every_matching_line() {
1907        assert_eq!(
1908            run_sed("/old/c new", "old\nkeep\nold\n"),
1909            "new\nkeep\nnew\n"
1910        );
1911    }
1912}