use rpm_spec::ast::{Text, TextSegment};
#[derive(Debug, Clone)]
pub struct ShellToken {
pub parts: Vec<ShellArg>,
}
#[derive(Debug, Clone)]
pub enum ShellArg {
Literal(String),
Macro(String),
}
impl ShellToken {
pub fn literal_str(&self) -> Option<String> {
let mut out = String::new();
for p in &self.parts {
match p {
ShellArg::Literal(s) => out.push_str(s),
ShellArg::Macro(_) => return None,
}
}
Some(out)
}
pub fn render_verbatim(&self) -> String {
let mut out = String::new();
for p in &self.parts {
match p {
ShellArg::Literal(s) => out.push_str(s),
ShellArg::Macro(name) => {
out.push_str("%{");
out.push_str(name);
out.push('}');
}
}
}
out
}
}
pub fn tokenize_line(line: &Text) -> Vec<ShellToken> {
let mut tokens = Vec::new();
let mut current = ShellToken { parts: Vec::new() };
let mut current_literal = String::new();
let mut state = State::Outside;
for seg in &line.segments {
match seg {
TextSegment::Literal(s) => {
tokenize_literal_chunk(
s,
&mut state,
&mut current,
&mut current_literal,
&mut tokens,
);
}
TextSegment::Macro(m) => {
flush_literal(&mut current_literal, &mut current);
current.parts.push(ShellArg::Macro(m.name.clone()));
}
_ => {}
}
}
flush_literal(&mut current_literal, &mut current);
if !current.parts.is_empty() {
tokens.push(current);
}
tokens
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum State {
Outside,
Unquoted,
Single,
Double,
}
fn tokenize_literal_chunk(
s: &str,
state: &mut State,
current: &mut ShellToken,
literal: &mut String,
tokens: &mut Vec<ShellToken>,
) {
let bytes = s.as_bytes();
let mut i = 0;
while i < bytes.len() {
let b = bytes[i];
match *state {
State::Outside => {
if b.is_ascii_whitespace() {
i += 1;
continue;
}
if b == b'#' {
return;
}
*state = match b {
b'\'' => State::Single,
b'"' => State::Double,
_ => {
literal.push(b as char);
State::Unquoted
}
};
i += 1;
}
State::Unquoted => {
if b.is_ascii_whitespace() {
finish_token(literal, current, tokens);
*state = State::Outside;
i += 1;
continue;
}
match b {
b'\'' => {
*state = State::Single;
i += 1;
}
b'"' => {
*state = State::Double;
i += 1;
}
b'\\' if i + 1 < bytes.len() => {
literal.push(bytes[i + 1] as char);
i += 2;
}
_ => {
literal.push(b as char);
i += 1;
}
}
}
State::Single => {
if b == b'\'' {
*state = State::Unquoted;
i += 1;
} else {
literal.push(b as char);
i += 1;
}
}
State::Double => {
match b {
b'"' => {
*state = State::Unquoted;
i += 1;
}
b'\\' if i + 1 < bytes.len() => {
literal.push(bytes[i + 1] as char);
i += 2;
}
_ => {
literal.push(b as char);
i += 1;
}
}
}
}
}
}
fn finish_token(literal: &mut String, current: &mut ShellToken, tokens: &mut Vec<ShellToken>) {
flush_literal(literal, current);
if !current.parts.is_empty() {
tokens.push(std::mem::replace(current, ShellToken { parts: Vec::new() }));
}
}
fn flush_literal(literal: &mut String, current: &mut ShellToken) {
if !literal.is_empty() {
current
.parts
.push(ShellArg::Literal(std::mem::take(literal)));
}
}
pub(crate) fn first_non_flag_arg(tokens: &[ShellToken]) -> Option<String> {
tokens
.iter()
.skip(1)
.filter_map(|tok| tok.literal_str())
.find(|lit| !lit.starts_with('-'))
}
pub(crate) fn strip_trailing_comment(s: &str) -> &str {
let bytes = s.as_bytes();
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'#' && (i == 0 || bytes[i - 1].is_ascii_whitespace()) {
return &s[..i];
}
i += 1;
}
s
}
#[cfg(test)]
mod tests {
use super::*;
use rpm_spec::ast::{ConditionalMacro, MacroKind, MacroRef, Text, TextSegment};
fn t(src: &str) -> Text {
Text::from(src)
}
fn lit(src: &str) -> ShellArg {
ShellArg::Literal(src.to_owned())
}
fn mac(name: &str) -> ShellArg {
ShellArg::Macro(name.to_owned())
}
fn macro_text(literal_prefix: &str, macro_name: &str, literal_suffix: &str) -> Text {
Text {
segments: vec![
TextSegment::Literal(literal_prefix.into()),
TextSegment::macro_ref(MacroRef {
kind: MacroKind::Braced,
name: macro_name.into(),
args: Vec::new(),
conditional: ConditionalMacro::None,
with_value: None,
}),
TextSegment::Literal(literal_suffix.into()),
],
}
}
#[test]
fn splits_on_whitespace() {
let toks = tokenize_line(&t("rm -rf /tmp/foo"));
assert_eq!(toks.len(), 3);
assert_eq!(toks[0].literal_str().as_deref(), Some("rm"));
assert_eq!(toks[1].literal_str().as_deref(), Some("-rf"));
assert_eq!(toks[2].literal_str().as_deref(), Some("/tmp/foo"));
}
#[test]
fn empty_line_yields_no_tokens() {
assert!(tokenize_line(&t("")).is_empty());
assert!(tokenize_line(&t(" ")).is_empty());
}
#[test]
fn comment_terminates_line() {
let toks = tokenize_line(&t("echo hi # ignored"));
assert_eq!(toks.len(), 2);
assert_eq!(toks[0].literal_str().as_deref(), Some("echo"));
assert_eq!(toks[1].literal_str().as_deref(), Some("hi"));
}
#[test]
fn single_quotes_preserve_whitespace() {
let toks = tokenize_line(&t("echo 'hello world'"));
assert_eq!(toks.len(), 2);
assert_eq!(toks[1].literal_str().as_deref(), Some("hello world"));
}
#[test]
fn double_quotes_preserve_whitespace() {
let toks = tokenize_line(&t("install -m 0644 \"a b.txt\" /etc/"));
assert_eq!(toks.len(), 5);
assert_eq!(toks[3].literal_str().as_deref(), Some("a b.txt"));
}
#[test]
fn backslash_escapes_space() {
let toks = tokenize_line(&t("touch foo\\ bar"));
assert_eq!(toks.len(), 2);
assert_eq!(toks[1].literal_str().as_deref(), Some("foo bar"));
}
#[test]
fn macro_keeps_part_in_token() {
let line = Text {
segments: vec![
TextSegment::Literal("cp ".into()),
TextSegment::macro_ref(MacroRef {
kind: MacroKind::Braced,
name: "buildroot".into(),
args: Vec::new(),
conditional: ConditionalMacro::None,
with_value: None,
}),
TextSegment::Literal("/etc/foo /etc/foo".into()),
],
};
let toks = tokenize_line(&line);
assert_eq!(toks.len(), 3);
assert!(toks[0].literal_str().as_deref() == Some("cp"));
assert!(toks[1].literal_str().is_none());
assert_eq!(toks[1].render_verbatim(), "%{buildroot}/etc/foo");
assert_eq!(toks[2].literal_str().as_deref(), Some("/etc/foo"));
}
#[test]
fn unclosed_quote_consumes_rest_of_line() {
let toks = tokenize_line(&t("echo 'no closing quote"));
assert_eq!(toks.len(), 2);
assert_eq!(toks[1].literal_str().as_deref(), Some("no closing quote"));
}
#[test]
fn render_verbatim_preserves_macro_in_word() {
let line = macro_text("foo-", "version", "-bar");
let toks = tokenize_line(&line);
assert_eq!(toks.len(), 1);
assert_eq!(toks[0].render_verbatim(), "foo-%{version}-bar");
assert!(toks[0].literal_str().is_none());
}
#[test]
fn smoke_unused_helper_imports() {
let _ = (lit("x"), mac("y"));
}
}