#![deny(missing_docs)]
use std::fmt::{self as fmt, Display, Formatter};
use crate::diagnostics::TranslationResult;
use super::*;
use log::warn;
use proc_macro2::{TokenStream, TokenTree};
use syn::__private::ToTokens;
enum ArgDirSpec {
In,
Out,
InOut,
LateOut,
InLateOut,
}
impl Display for ArgDirSpec {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
use ArgDirSpec::*;
write!(
f,
"{}",
match self {
In => "in",
Out => "out",
InOut => "inout",
LateOut => "lateout",
InLateOut => "inlateout",
}
)
}
}
impl ArgDirSpec {
fn with_in(&self) -> Self {
use ArgDirSpec::*;
match self {
In => In,
Out => InOut,
InOut => InOut,
LateOut => InLateOut,
InLateOut => InLateOut,
}
}
}
#[derive(Copy, Clone, PartialEq)]
enum Arch {
X86,
X86_64,
Arm,
Aarch64,
Riscv,
}
fn parse_arch(target_tuple: &str) -> Option<Arch> {
if target_tuple.starts_with("x86_64") {
Some(Arch::X86_64)
} else if target_tuple.starts_with("i386")
|| target_tuple.starts_with("i486")
|| target_tuple.starts_with("i586")
|| target_tuple.starts_with("i686")
|| target_tuple.starts_with("x86")
{
Some(Arch::X86)
} else if target_tuple.starts_with("aarch64")
|| target_tuple.starts_with("armv8")
|| target_tuple.starts_with("arm64")
{
Some(Arch::Aarch64)
} else if target_tuple.starts_with("arm") || target_tuple.starts_with("thumbv") {
Some(Arch::Arm)
} else if target_tuple.starts_with("riscv") {
Some(Arch::Riscv)
} else {
None
}
}
fn parse_constraints(
mut constraints: &str,
arch: Arch,
) -> TranslationResult<(ArgDirSpec, bool, String)> {
let parse_error = |constraints| {
Err(TranslationError::new(
None,
failure::err_msg(
"Inline assembly constraints could not be parsed: ".to_owned() + constraints,
)
.context(TranslationErrorKind::Generic),
))
};
use ArgDirSpec::*;
let mut is_input = match constraints.chars().next() {
Some('+') => {
constraints = &constraints[1..];
true
}
Some('=') => {
constraints = &constraints[1..];
false
}
_ => true,
};
let early_clobber = if constraints.starts_with('&') {
constraints = &constraints[1..];
true
} else {
false
};
let mut mem_only = if constraints.starts_with('*') {
constraints = &constraints[1..];
true
} else {
false
};
let mut split = constraints.splitn(2, ',');
constraints = match split.next() {
Some(c) => c,
_ => return parse_error(constraints),
};
if split.next().is_some() {
if !is_input {
is_input = true;
} else {
return parse_error(constraints);
}
}
let constraints = constraints.replace(['{', '}'], "\"");
let mut llvm_constraints = constraints.clone();
let mut constraints = constraints.as_str();
while !constraints.is_empty() {
let (c, rest) = constraints.split_at(1);
let c = c.chars().next().unwrap();
match c {
'm' => {
mem_only = true;
llvm_constraints = "reg".into();
}
'r' => {
llvm_constraints = "reg".into();
}
'i' => {
llvm_constraints = "reg".into();
}
_ => {
let is_explicit_reg = c == '"';
let is_tied = !constraints.contains(|c: char| !c.is_ascii_digit());
if !(is_explicit_reg || is_tied) {
if let Some((machine_constraints, is_mem)) =
translate_machine_constraint(constraints, arch)
{
llvm_constraints = machine_constraints.into();
mem_only = is_mem;
} else {
warn!(
"Did not recognize inline asm constraint: {}\n\
It is likely that this will cause compilation errors or \
incorrect semantics in the translated program; please \
manually correct.",
constraints
);
llvm_constraints = constraints.into();
}
}
break;
}
}
constraints = rest;
}
let mode = if mem_only {
In
} else {
match (is_input, early_clobber) {
(false, false) => LateOut,
(false, true) => Out,
(true, false) => InLateOut,
(true, true) => InOut,
}
};
Ok((mode, mem_only, llvm_constraints))
}
fn is_regname_or_int(parsed_constraint: &str) -> bool {
parsed_constraint.contains('"') || parsed_constraint.starts_with(|c: char| c.is_ascii_digit())
}
fn translate_machine_constraint(constraint: &str, arch: Arch) -> Option<(&str, bool)> {
let mem = &mut false;
let constraint = match arch {
Arch::X86 | Arch::X86_64 => match constraint {
"Q" => "reg_abcd",
"q" => "reg_byte",
"a" => "\"a\"",
"b" => "\"b\"",
"c" => "\"c\"",
"d" => "\"d\"",
"S" => "\"si\"",
"D" => "\"di\"",
"U" => {
warn!(
"the x86 'U' inline assembly operand constraint cannot \
be translated correctly. It corresponds to the `clobber_abi` \
option for `asm!`, but c2rust does not know the ABI being \
used, so it cannot be translated automatically. Please correct \
manually after translation."
);
return None;
}
"f" => "x87_reg",
"t" => "\"st(0)\"",
"u" => "\"st(1)\"",
"x" => "xmm_reg", "y" => "mmx_reg",
"v" => "zmm_reg",
"Yz" => "\"xmm0\"",
"Yk" => "kreg",
_ => return None,
},
Arch::Aarch64 => match constraint {
"k" => "\"SP\"",
"w" => "vreg",
"x" => "vreg_low16",
"Upa" => "preg",
"Q" => {
*mem = true;
"reg"
}
"Ump" => {
*mem = true;
"reg"
}
_ => return None,
},
Arch::Arm => match constraint {
"k" => "\"SP\"",
"l" => "reg",
"t" => "sreg",
"x" => "sreg_low16",
"w" => "dreg",
"Q" => {
*mem = true;
"reg"
}
"Uv" | "Uy" | "Uq" => {
*mem = true;
"reg"
}
_ => return None,
},
Arch::Riscv => match constraint {
"f" => "freg",
_ => return None,
},
};
Some((constraint, *mem))
}
fn translate_modifier(modifier: char, arch: Arch) -> Option<char> {
Some(match arch {
Arch::X86 | Arch::X86_64 => match modifier {
'k' => 'e',
'q' => 'r',
'b' => 'l',
'h' => 'h',
'w' => 'x',
_ => return None,
},
Arch::Aarch64 => modifier,
Arch::Arm => match modifier {
'p' | 'q' => return None,
_ => modifier,
},
Arch::Riscv => modifier,
})
}
struct BidirAsmOperand {
dir_spec: ArgDirSpec,
mem_only: bool,
constraints: String,
name: Option<String>,
in_expr: Option<(usize, CExprId)>,
out_expr: Option<(usize, CExprId)>,
}
impl BidirAsmOperand {
fn is_positional(&self) -> bool {
!self.constraints.contains('"') && self.name.is_none()
}
fn has_orig_idx(&self, orig_idx: usize) -> bool {
match (self.out_expr, self.in_expr) {
(Some((idx, _)), _) if idx == orig_idx => true,
(_, Some((idx, _))) if idx == orig_idx => true,
_ => false,
}
}
}
fn reg_is_reserved(constraint: &str, arch: Arch) -> Option<(&str, &str)> {
Some(match arch {
Arch::X86 => match constraint {
"\"esi\"" | "\"si\"" => {
let reg = constraint.trim_matches('"');
let mods = ®[..reg.len() - 2];
(reg, mods)
}
_ => return None,
},
Arch::X86_64 => match constraint {
"\"bl\"" | "\"bh\"" | "\"bx\"" | "\"ebx\"" | "\"rbx\"" => {
let reg = constraint.trim_matches('"');
let mods = if reg.len() == 2 {
®[1..] } else {
®[..1] };
(reg, mods)
}
_ => return None,
},
_ => return None,
})
}
fn rewrite_reserved_reg_operands(
att_syntax: bool,
arch: Arch,
operands: &mut [BidirAsmOperand],
) -> (String, String) {
let (mut prolog, mut epilog) = (String::new(), String::new());
let mut rewrite_idxs = vec![];
let mut total_positional = 0;
for (i, operand) in operands.iter().enumerate() {
if operand.is_positional() {
total_positional += 1;
} else if let Some((reg, mods)) = reg_is_reserved(&operand.constraints, arch) {
rewrite_idxs.push((i, reg.to_owned(), mods.to_owned()));
}
}
for (n_moved, (idx, reg, mods)) in rewrite_idxs.into_iter().enumerate() {
let operand = &mut operands[idx];
let name = format!("restmp{}", n_moved);
if let Some((_idx, _in_expr)) = operand.in_expr {
let move_input = if att_syntax {
format!("mov %{}, {{{}:{}}}\n", reg, name, mods)
} else {
format!("mov {{{}:{}}}\n, {}", name, mods, reg)
};
prolog.push_str(&move_input);
}
if let Some((_idx, _out_expr)) = operand.out_expr {
let move_output = if att_syntax {
format!("\nmov {{{}:{}}}, %{}", name, mods, reg)
} else {
format!("\nmov {}, {{{}:{}}}", reg, name, mods)
};
epilog.push_str(&move_output);
}
operand.constraints = "reg".into();
operand.name = Some(name);
let nth_non_positional = total_positional + n_moved;
operands.swap(idx, nth_non_positional);
}
(prolog, epilog)
}
fn remove_comments(mut asm: &str) -> String {
let mut without_c_comments = String::with_capacity(asm.len());
while let Some(comment_begin) = asm.find("/*") {
let comment_len = asm[comment_begin..]
.find("*/")
.unwrap_or_else(|| asm[comment_begin..].len());
let before_comment = &asm[..comment_begin];
without_c_comments.push_str(before_comment);
asm = &asm[comment_begin + comment_len..];
}
without_c_comments.push_str(asm);
let mut without_comments = String::with_capacity(without_c_comments.len());
for line in without_c_comments.lines() {
if let Some(line_comment_idx) = line.find('#') {
without_comments.push_str(&line[..line_comment_idx]);
} else {
without_comments.push_str(line);
}
without_comments.push('\n');
}
without_comments
}
fn asm_is_att_syntax(asm: &str) -> bool {
let asm = &*remove_comments(asm);
let intel_directive = asm.find(".intel_syntax");
let att_directive = asm.find(".att_syntax");
match (intel_directive, att_directive) {
(Some(intel_pos), Some(att_pos)) => {
att_pos < intel_pos
}
(Some(_intel), None) => false,
(None, Some(_att)) => true,
(None, None) => {
#[allow(clippy::needless_bool)]
if asm.contains("word ptr") {
false
} else if asm.contains("$$") || asm.contains('%') || asm.contains('(') {
true
} else if asm.contains('[') {
false
} else {
true
}
}
}
}
fn tied_output_operand_idx(
idx: usize,
num_output_operands: usize,
tied_operands: &HashMap<(usize, bool), usize>,
) -> usize {
if let Some(adj_idx) = idx.checked_sub(num_output_operands) {
match tied_operands.get(&(adj_idx, false)) {
Some(&out_idx) => {
return out_idx;
}
None => {
let num_tied_before = tied_operands
.keys()
.filter(|&&(iidx, is_out)| !is_out && iidx < adj_idx)
.count();
return idx - num_tied_before;
}
};
}
idx
}
fn rewrite_asm<F: Fn(&str) -> bool, M: Fn(usize) -> usize>(
asm: &str,
att_syntax: bool,
input_op_mapper: M,
is_mem_only: F,
arch: Arch,
) -> TranslationResult<String> {
let mut out = String::with_capacity(asm.len());
let mut first = true;
let mut last_empty = false;
for chunk in asm.split('$') {
if first {
first = false;
out.push_str(chunk);
continue;
}
if last_empty {
last_empty = false;
out.push('$');
out.push_str(chunk);
continue;
}
if chunk.is_empty() {
last_empty = true;
continue;
}
if chunk.starts_with('{') {
if let Some(end_idx) = chunk.find('}') {
let ref_str = &chunk[..end_idx];
if let Some(colon_idx) = ref_str.find(':') {
let (before_mods, _modifiers) = ref_str.split_at(colon_idx + 1);
out.push('{');
let idx: usize = before_mods
.trim_matches(|c: char| !c.is_ascii_digit())
.parse()
.map_err(|_| TranslationError::generic("could not parse operand idx"))?;
out.push_str(input_op_mapper(idx).to_string().as_str());
out.push(':');
let modifiers = ref_str[colon_idx + 1..].chars();
for modifier in modifiers {
if let Some(new) = translate_modifier(modifier, arch) {
out.push(new);
}
}
out.push_str(&chunk[end_idx..]);
}
} else {
out.push_str(chunk);
}
continue;
}
if chunk.starts_with(|c: char| c.is_ascii_alphanumeric()) {
let end_idx = chunk
.find(|c: char| c == ',' || !c.is_ascii_alphanumeric())
.unwrap_or(chunk.len());
let ref_str = &chunk[..end_idx];
let index_str;
let mut new_modifiers = String::new();
if let Some(true) = ref_str.chars().next().map(|c| c.is_ascii_alphabetic()) {
let (modifiers, index) = ref_str.split_at(1);
index_str = index;
for modifier in modifiers.chars() {
if let Some(new) = translate_modifier(modifier, arch) {
new_modifiers.push(new);
}
}
} else {
index_str = ref_str;
}
let mem_only = is_mem_only(index_str);
if mem_only {
out.push(if att_syntax { '(' } else { '[' });
};
out.push('{');
let idx: usize = index_str
.parse()
.map_err(|_| TranslationError::generic("could not parse operand idx"))?;
out.push_str(input_op_mapper(idx).to_string().as_str());
if !new_modifiers.is_empty() {
out.push(':');
out.push_str(&new_modifiers);
}
out.push('}');
if mem_only {
out.push(if att_syntax { ')' } else { ']' });
};
out.push_str(&chunk[end_idx..]);
continue;
}
out.push_str(chunk);
}
Ok(out)
}
impl<'c> Translation<'c> {
pub fn convert_asm(
&self,
ctx: ExprContext,
span: Span,
is_volatile: bool,
asm: &str,
inputs: &[AsmOperand],
outputs: &[AsmOperand],
clobbers: &[String],
) -> TranslationResult<Vec<Stmt>> {
if !self.tcfg.translate_asm {
return Err(TranslationError::generic(
"Inline assembly translation not enabled.",
));
}
let arch = match parse_arch(&self.ast_context.target) {
Some(arch) => arch,
None => {
return Err(TranslationError::generic(
"Cannot translate inline assembly for unfamiliar architecture",
))
}
};
self.use_feature("asm");
fn push_expr(tokens: &mut Vec<TokenTree>, expr: Box<Expr>) {
tokens.extend(expr.to_token_stream());
}
let mut stmts: Vec<Stmt> = vec![];
let mut post_stmts: Vec<Stmt> = vec![];
let mut tokens: Vec<TokenTree> = vec![];
let mut tied_operands = HashMap::new();
for (input_idx, AsmOperand { constraints, .. }) in inputs.iter().enumerate() {
let constraints_digits = constraints.trim_matches(|c: char| !c.is_ascii_digit());
if let Ok(output_idx) = constraints_digits.parse::<usize>() {
let output_key = (output_idx, true);
let input_key = (input_idx + outputs.len(), false);
tied_operands.insert(output_key, input_idx);
tied_operands.insert(input_key, output_idx);
}
}
let operand_is_mem_only = |operand: &AsmOperand| -> bool {
if let Ok((_dir_spec, mem_only, _parsed)) =
parse_constraints(&operand.constraints, arch)
{
mem_only
} else {
println!("could not parse asm constraints: {}", operand.constraints);
false
}
};
let mut inputs_by_register = HashMap::new();
let mut other_inputs = Vec::new();
for (i, input) in inputs.iter().enumerate() {
let combined_idx = i + outputs.len();
let (_dir_spec, _mem_only, parsed) = parse_constraints(&input.constraints, arch)?;
if is_regname_or_int(&parsed) {
inputs_by_register.insert(parsed, (combined_idx, input.clone()));
} else {
other_inputs.push((parsed, (combined_idx, input.clone())));
}
}
let mut args = Vec::new();
for (output_idx, output) in outputs.iter().enumerate() {
match parse_constraints(&output.constraints, arch) {
Ok((mut dir_spec, mem_only, parsed)) => {
let mut in_expr = inputs_by_register.remove(&parsed);
if in_expr.is_none() {
in_expr = inputs_by_register.remove(&output_idx.to_string());
}
let in_expr = in_expr.map(|(i, operand)| (i, operand.expression));
if in_expr.is_some() {
dir_spec = dir_spec.with_in();
}
args.push(BidirAsmOperand {
dir_spec,
mem_only,
name: None,
constraints: parsed,
in_expr,
out_expr: Some((output_idx, output.expression)),
});
}
Err(e) => eprintln!("{}", e),
}
}
for (_, (input_idx, input)) in inputs_by_register
.into_iter()
.chain(other_inputs.into_iter())
{
let (dir_spec, mem_only, parsed) = match parse_constraints(&input.constraints, arch) {
Ok(x) => x,
Err(e) => {
eprintln!("{}", e);
continue;
}
};
args.push(BidirAsmOperand {
dir_spec,
mem_only,
name: None,
constraints: parsed,
in_expr: Some((input_idx, input.expression)),
out_expr: None,
});
}
let att_syntax = match arch {
Arch::X86 | Arch::X86_64 => asm_is_att_syntax(asm),
_ => false,
};
args.sort_by_key(|arg| !arg.is_positional());
let (prolog, epilog) = rewrite_reserved_reg_operands(att_syntax, arch, &mut args);
let new_idx_for_orig = |orig_idx| {
args.iter()
.position(|operand| operand.has_orig_idx(orig_idx))
.unwrap_or_else(|| panic!("no operand had index {orig_idx} in asm str:\n{asm}"))
};
let rewritten_asm = rewrite_asm(
asm,
att_syntax,
|idx: usize| {
new_idx_for_orig(tied_output_operand_idx(idx, outputs.len(), &tied_operands))
},
|ref_str: &str| {
if let Ok(idx) = ref_str.parse::<usize>() {
outputs
.iter()
.chain(inputs.iter())
.nth(idx)
.map(operand_is_mem_only)
.unwrap_or(false)
} else {
false
}
},
arch,
)?;
let rewritten_asm = prolog + &rewritten_asm + &epilog;
for line in rewritten_asm.split('\n') {
push_expr(&mut tokens, mk().lit_expr(line.to_string() + "\n"));
tokens.push(TokenTree::Punct(Punct::new(',', Alone)));
}
tokens.pop();
let mut operand_renames = HashMap::new();
for operand in args {
tokens.push(TokenTree::Punct(Punct::new(',', Alone)));
let out_expr = if let Some((output_idx, out_expr)) = operand.out_expr {
let mut out_expr = self.convert_expr(ctx.used(), out_expr, None)?;
stmts.append(out_expr.stmts_mut());
let mut out_expr = out_expr.into_value();
if operand.mem_only {
out_expr = mk().mutbl().borrow_expr(out_expr);
}
if let Some(_tied_operand) = tied_operands.get(&(output_idx, true)) {
let output_name = self.renamer.borrow_mut().fresh();
let output_local = mk().local(
mk().ident_pat(&output_name),
None,
Some(mk().mutbl().borrow_expr(out_expr)),
);
stmts.push(mk().local_stmt(Box::new(output_local)));
let inner_name = self.renamer.borrow_mut().fresh();
let inner_local = mk().local(mk().ident_pat(&inner_name), None, None);
stmts.push(mk().local_stmt(Box::new(inner_local)));
out_expr = mk().ident_expr(&inner_name);
operand_renames.insert(output_idx, (output_name, inner_name));
}
Some(out_expr)
} else {
None
};
let in_expr = if let Some((input_idx, in_expr)) = operand.in_expr {
let mut in_expr = self.convert_expr(ctx.used(), in_expr, None)?;
stmts.append(in_expr.stmts_mut());
let mut in_expr = in_expr.into_value();
if operand.mem_only {
in_expr = mk().borrow_expr(in_expr);
}
if let Some(tied_operand) = tied_operands.get(&(input_idx, false)) {
self.use_crate(ExternCrate::C2RustAsmCasts);
self.with_cur_file_item_store(|item_store| {
item_store.add_use(true, vec!["c2rust_asm_casts".into()], "AsmCastTrait");
});
let (output_name, inner_name) = operand_renames.get(tied_operand).unwrap();
let input_name = self.renamer.borrow_mut().fresh();
let input_local = mk().local(mk().ident_pat(&input_name), None, Some(in_expr));
stmts.push(mk().local_stmt(Box::new(input_local)));
let path_expr = mk().path_expr(vec!["c2rust_asm_casts", "AsmCast", "cast_in"]);
let output = mk().ident_expr(output_name);
let input = mk().ident_expr(input_name);
in_expr = mk().call_expr(path_expr, vec![output.clone(), input.clone()]);
let path_expr = mk().path_expr(vec!["c2rust_asm_casts", "AsmCast", "cast_out"]);
let inner = mk().ident_expr(inner_name);
let cast_out = mk().call_expr(path_expr, vec![output, input, inner]);
post_stmts.push(mk().semi_stmt(cast_out));
}
Some(in_expr)
} else {
None
};
if let Some(name) = operand.name {
push_expr(&mut tokens, mk().ident_expr(name));
tokens.push(TokenTree::Punct(Punct::new('=', Alone)));
}
push_expr(&mut tokens, mk().ident_expr(operand.dir_spec.to_string()));
let constraints_ident = if is_regname_or_int(&operand.constraints) {
mk().lit_expr(operand.constraints.trim_matches('"'))
} else {
mk().ident_expr(operand.constraints)
};
push_expr(&mut tokens, mk().paren_expr(constraints_ident));
if let Some(in_expr) = in_expr {
let in_expr_span = in_expr.span();
push_expr(&mut tokens, in_expr);
if out_expr.is_some() {
tokens.push(TokenTree::Punct(Punct::new('=', Joint)));
tokens.push(TokenTree::Punct(Punct::new('>', Alone)));
} else {
if let ArgDirSpec::InOut | ArgDirSpec::InLateOut = operand.dir_spec {
tokens.push(TokenTree::Punct(Punct::new('=', Joint)));
tokens.push(TokenTree::Punct(Punct::new('>', Alone)));
tokens.push(TokenTree::Ident(Ident::new("_", in_expr_span)));
}
}
}
if let Some(out_expr) = out_expr {
push_expr(&mut tokens, out_expr);
}
}
let mut preserves_flags = true;
let mut read_only = true;
for clobber in clobbers {
if clobber == "cc" {
preserves_flags = false;
continue;
};
if clobber == "memory" {
read_only = false;
continue;
};
let quoted = format!("\"{}\"", clobber);
if reg_is_reserved("ed, arch).is_some() {
warn!(
"Attempting to clobber reserved register ({}), dropping clobber! \
This likely means the potential for miscompilation has been introduced. \
Please rewrite this assembly to save/restore the value of this register \
if at all possible.",
clobber
);
continue;
}
tokens.push(TokenTree::Punct(Punct::new(',', Alone)));
let result = mk().call_expr(mk().ident_expr("out"), vec![mk().lit_expr(clobber)]);
push_expr(&mut tokens, result);
push_expr(&mut tokens, mk().ident_expr("_"));
}
{
let mut options = vec![];
if preserves_flags {
options.push(mk().ident_expr("preserves_flags"));
}
if !is_volatile {
if read_only && (outputs.len() + clobbers.len()) > 0 {
options.push(mk().ident_expr("pure"));
options.push(mk().ident_expr("readonly"));
}
}
if att_syntax {
options.push(mk().ident_expr("att_syntax"));
}
if !options.is_empty() {
tokens.push(TokenTree::Punct(Punct::new(',', Alone)));
let result = mk().call_expr(mk().ident_expr("options"), options);
push_expr(&mut tokens, result);
}
}
self.with_cur_file_item_store(|item_store| {
item_store.add_use(true, vec!["core".into(), "arch".into()], "asm");
});
let mac = mk().mac(
mk().path(vec!["asm"]),
tokens.into_iter().collect::<TokenStream>(),
MacroDelimiter::Paren(Default::default()),
);
let mac = mk().mac_expr(mac);
let mac = mk().span(span).semi_stmt(mac);
stmts.push(mac);
stmts.extend(post_stmts);
Ok(stmts)
}
}