c2rust_transpile/
lib.rs

1#![allow(clippy::too_many_arguments)]
2
3mod diagnostics;
4
5pub mod build_files;
6pub mod c_ast;
7pub mod cfg;
8mod compile_cmds;
9pub mod convert_type;
10pub mod renamer;
11pub mod rust_ast;
12pub mod translator;
13pub mod with_stmts;
14
15use std::collections::HashSet;
16use std::fs::{self, File};
17use std::io;
18use std::io::prelude::*;
19use std::path::{Path, PathBuf};
20use std::process;
21
22use crate::compile_cmds::CompileCmd;
23use failure::Error;
24use itertools::Itertools;
25use log::{info, warn};
26use regex::Regex;
27use serde_derive::Serialize;
28pub use tempfile::TempDir;
29
30use crate::c_ast::Printer;
31use crate::c_ast::*;
32pub use crate::diagnostics::Diagnostic;
33use c2rust_ast_exporter as ast_exporter;
34
35use crate::build_files::{emit_build_files, get_build_dir, CrateConfig};
36use crate::compile_cmds::get_compile_commands;
37use crate::convert_type::RESERVED_NAMES;
38pub use crate::translator::ReplaceMode;
39use std::prelude::v1::Vec;
40
41type PragmaVec = Vec<(&'static str, Vec<&'static str>)>;
42type PragmaSet = indexmap::IndexSet<(&'static str, &'static str)>;
43type CrateSet = indexmap::IndexSet<ExternCrate>;
44type TranspileResult = Result<(PathBuf, PragmaVec, CrateSet), ()>;
45
46#[derive(Default, Debug)]
47pub enum TranslateMacros {
48    /// Don't translate any macros.
49    None,
50
51    /// Translate the conservative subset of macros known to always work.
52    #[default]
53    Conservative,
54
55    /// Try to translate more, but this is experimental and not guaranteed to work.
56    ///
57    /// For const-like macros, this works in some cases.
58    /// For function-like macros, this doesn't really work at all yet.
59    Experimental,
60}
61
62/// Configuration settings for the translation process
63#[derive(Debug)]
64pub struct TranspilerConfig {
65    // Debug output options
66    pub dump_untyped_context: bool,
67    pub dump_typed_context: bool,
68    pub pretty_typed_context: bool,
69    pub dump_function_cfgs: bool,
70    pub json_function_cfgs: bool,
71    pub dump_cfg_liveness: bool,
72    pub dump_structures: bool,
73    pub verbose: bool,
74    pub debug_ast_exporter: bool,
75
76    // Options that control translation
77    pub incremental_relooper: bool,
78    pub fail_on_multiple: bool,
79    pub filter: Option<Regex>,
80    pub debug_relooper_labels: bool,
81    pub prefix_function_names: Option<String>,
82    pub translate_asm: bool,
83    pub use_c_loop_info: bool,
84    pub use_c_multiple_info: bool,
85    pub simplify_structures: bool,
86    pub panic_on_translator_failure: bool,
87    pub emit_modules: bool,
88    pub fail_on_error: bool,
89    pub replace_unsupported_decls: ReplaceMode,
90    pub translate_valist: bool,
91    pub overwrite_existing: bool,
92    pub reduce_type_annotations: bool,
93    pub reorganize_definitions: bool,
94    pub enabled_warnings: HashSet<Diagnostic>,
95    pub emit_no_std: bool,
96    pub output_dir: Option<PathBuf>,
97    pub translate_const_macros: TranslateMacros,
98    pub translate_fn_macros: TranslateMacros,
99    pub disable_refactoring: bool,
100    pub preserve_unused_functions: bool,
101    pub log_level: log::LevelFilter,
102
103    // Options that control build files
104    /// Emit `Cargo.toml` and `lib.rs`
105    pub emit_build_files: bool,
106    /// Names of translation units containing main functions that we should make
107    /// into binaries
108    pub binaries: Vec<String>,
109}
110
111impl TranspilerConfig {
112    fn binary_name_from_path(file: &Path) -> String {
113        let file = Path::new(file.file_stem().unwrap());
114        get_module_name(file, false, false, false).unwrap()
115    }
116
117    fn is_binary(&self, file: &Path) -> bool {
118        let module_name = Self::binary_name_from_path(file);
119        self.binaries.contains(&module_name)
120    }
121
122    fn check_if_all_binaries_used(
123        &self,
124        transpiled_modules: impl IntoIterator<Item = impl AsRef<Path>>,
125    ) -> bool {
126        let module_names = transpiled_modules
127            .into_iter()
128            .map(|module| Self::binary_name_from_path(module.as_ref()))
129            .collect::<HashSet<_>>();
130        let mut ok = true;
131        for binary in &self.binaries {
132            if !module_names.contains(binary) {
133                ok = false;
134                warn!("binary not used: {binary}");
135            }
136        }
137        if !ok {
138            let module_names = module_names.iter().format(", ");
139            info!("candidate modules for binaries are: {module_names}");
140        }
141        ok
142    }
143
144    fn crate_name(&self) -> String {
145        self.output_dir
146            .as_ref()
147            .and_then(|dir| dir.file_name())
148            .map(|fname| str_to_ident_checked(fname.to_string_lossy().as_ref(), true))
149            .unwrap_or_else(|| "c2rust_out".into())
150    }
151}
152
153#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
154pub enum ExternCrate {
155    C2RustBitfields,
156    C2RustAsmCasts,
157    F128,
158    NumTraits,
159    Memoffset,
160    Libc,
161}
162
163#[derive(Serialize)]
164struct ExternCrateDetails {
165    name: &'static str,
166    ident: String,
167    macro_use: bool,
168    version: &'static str,
169}
170
171impl ExternCrateDetails {
172    fn new(name: &'static str, version: &'static str, macro_use: bool) -> Self {
173        Self {
174            name,
175            ident: name.replace('-', "_"),
176            macro_use,
177            version,
178        }
179    }
180}
181
182impl From<ExternCrate> for ExternCrateDetails {
183    fn from(extern_crate: ExternCrate) -> Self {
184        match extern_crate {
185            ExternCrate::C2RustBitfields => Self::new("c2rust-bitfields", "0.3", true),
186            ExternCrate::C2RustAsmCasts => Self::new("c2rust-asm-casts", "0.2", true),
187            ExternCrate::F128 => Self::new("f128", "0.2", false),
188            ExternCrate::NumTraits => Self::new("num-traits", "0.2", true),
189            ExternCrate::Memoffset => Self::new("memoffset", "0.5", true),
190            ExternCrate::Libc => Self::new("libc", "0.2", false),
191        }
192    }
193}
194
195fn char_to_ident(c: char) -> char {
196    if c.is_alphanumeric() {
197        c
198    } else {
199        '_'
200    }
201}
202
203fn str_to_ident(s: &str) -> String {
204    s.chars().map(char_to_ident).collect()
205}
206
207/// Make sure that name:
208/// - does not contain illegal characters,
209/// - does not clash with reserved keywords.
210fn str_to_ident_checked(s: &str, check_reserved: bool) -> String {
211    let s = str_to_ident(s);
212
213    // make sure the name does not clash with keywords
214    if check_reserved && RESERVED_NAMES.contains(&s.as_str()) {
215        format!("r#{}", s)
216    } else {
217        s
218    }
219}
220
221fn get_module_name(
222    file: &Path,
223    check_reserved: bool,
224    keep_extension: bool,
225    full_path: bool,
226) -> Option<String> {
227    let is_rs = file.extension().map(|ext| ext == "rs").unwrap_or(false);
228    let fname = if is_rs {
229        file.file_stem()
230    } else {
231        file.file_name()
232    };
233    let fname = fname.unwrap().to_str().unwrap();
234    let mut name = str_to_ident_checked(fname, check_reserved);
235    if keep_extension && is_rs {
236        name.push_str(".rs");
237    }
238    let file = if full_path {
239        file.with_file_name(name)
240    } else {
241        Path::new(&name).to_path_buf()
242    };
243    file.to_str().map(String::from)
244}
245
246pub fn create_temp_compile_commands(sources: &[PathBuf]) -> (TempDir, PathBuf) {
247    // If we generate the same path here on every run, then we can't run
248    // multiple transpiles in parallel, so we need a unique path. But clang
249    // won't read this file unless it is named exactly "compile_commands.json",
250    // so we can't change the filename. Instead, create a temporary directory
251    // with a unique name, and put the file there.
252    let temp_dir = tempfile::Builder::new()
253        .prefix("c2rust-")
254        .tempdir()
255        .expect("Failed to create temporary directory for compile_commands.json");
256    let temp_path = temp_dir.path().join("compile_commands.json");
257
258    let compile_commands: Vec<CompileCmd> = sources
259        .iter()
260        .map(|source_file| {
261            let absolute_path = fs::canonicalize(source_file)
262                .unwrap_or_else(|_| panic!("Could not canonicalize {}", source_file.display()));
263
264            CompileCmd {
265                directory: PathBuf::from("."),
266                file: absolute_path.clone(),
267                arguments: vec![
268                    "clang".to_string(),
269                    absolute_path.to_str().unwrap().to_owned(),
270                ],
271                command: None,
272                output: None,
273            }
274        })
275        .collect();
276
277    let json_content = serde_json::to_string(&compile_commands).unwrap();
278    let mut file =
279        File::create(&temp_path).expect("Failed to create temporary compile_commands.json");
280    file.write_all(json_content.as_bytes())
281        .expect("Failed to write to temporary compile_commands.json");
282    (temp_dir, temp_path)
283}
284
285/// Main entry point to transpiler. Called from CLI tools with the result of
286/// clap::App::get_matches().
287pub fn transpile(tcfg: TranspilerConfig, cc_db: &Path, extra_clang_args: &[&str]) {
288    diagnostics::init(tcfg.enabled_warnings.clone(), tcfg.log_level);
289
290    let build_dir = get_build_dir(&tcfg, cc_db);
291
292    let lcmds = get_compile_commands(cc_db, &tcfg.filter).unwrap_or_else(|_| {
293        panic!(
294            "Could not parse compile commands from {}",
295            cc_db.to_string_lossy()
296        )
297    });
298
299    // Specify path to system include dir on macOS 10.14 and later. Disable the blocks extension.
300    let clang_args: Vec<String> = get_extra_args_macos();
301    let mut clang_args: Vec<&str> = clang_args.iter().map(AsRef::as_ref).collect();
302    clang_args.extend_from_slice(extra_clang_args);
303
304    let mut top_level_ccfg = None;
305    let mut workspace_members = vec![];
306    let mut num_transpiled_files = 0;
307    let mut transpiled_modules = Vec::new();
308
309    for lcmd in &lcmds {
310        let cmds = &lcmd.cmd_inputs;
311        let lcmd_name = lcmd
312            .output
313            .as_ref()
314            .map(|output| {
315                let output_path = Path::new(output);
316                output_path
317                    .file_stem()
318                    .unwrap()
319                    .to_str()
320                    .unwrap()
321                    .to_owned()
322            })
323            .unwrap_or_else(|| tcfg.crate_name());
324        let build_dir = if lcmd.top_level {
325            build_dir.to_path_buf()
326        } else {
327            build_dir.join(&lcmd_name)
328        };
329
330        // Compute the common ancestor of all input files
331        // FIXME: this is quadratic-time in the length of the ancestor path
332        let mut ancestor_path = cmds
333            .first()
334            .map(|cmd| {
335                let mut dir = cmd.abs_file();
336                dir.pop(); // discard the file part
337                dir
338            })
339            .unwrap_or_else(PathBuf::new);
340        if cmds.len() > 1 {
341            for cmd in &cmds[1..] {
342                let cmd_path = cmd.abs_file();
343                ancestor_path = ancestor_path
344                    .ancestors()
345                    .find(|a| cmd_path.starts_with(a))
346                    .map(ToOwned::to_owned)
347                    .unwrap_or_else(PathBuf::new);
348            }
349        }
350
351        let results = cmds
352            .iter()
353            .map(|cmd| {
354                transpile_single(
355                    &tcfg,
356                    &cmd.abs_file(),
357                    &ancestor_path,
358                    &build_dir,
359                    cc_db,
360                    &clang_args,
361                )
362            })
363            .collect::<Vec<TranspileResult>>();
364        let mut modules = vec![];
365        let mut modules_skipped = false;
366        let mut pragmas = PragmaSet::new();
367        let mut crates = CrateSet::new();
368        for res in results {
369            match res {
370                Ok((module, pragma_vec, crate_set)) => {
371                    modules.push(module);
372                    crates.extend(crate_set);
373
374                    num_transpiled_files += 1;
375                    for (key, vals) in pragma_vec {
376                        for val in vals {
377                            pragmas.insert((key, val));
378                        }
379                    }
380                }
381                Err(_) => {
382                    modules_skipped = true;
383                }
384            }
385        }
386        pragmas.sort();
387        crates.sort();
388
389        transpiled_modules.extend(modules.iter().cloned());
390
391        if tcfg.emit_build_files {
392            if modules_skipped {
393                // If we skipped a file, we may not have collected all required pragmas
394                warn!("Can't emit build files after incremental transpiler run; skipped.");
395                return;
396            }
397
398            let ccfg = CrateConfig {
399                crate_name: lcmd_name.clone(),
400                modules,
401                pragmas,
402                crates,
403                link_cmd: lcmd,
404            };
405            if lcmd.top_level {
406                top_level_ccfg = Some(ccfg);
407            } else {
408                let crate_file = emit_build_files(&tcfg, &build_dir, Some(ccfg), None);
409                reorganize_definitions(&tcfg, &build_dir, crate_file)
410                    .unwrap_or_else(|e| warn!("Reorganizing definitions failed: {}", e));
411                workspace_members.push(lcmd_name);
412            }
413        }
414    }
415
416    if num_transpiled_files == 0 {
417        warn!("No C files found in compile_commands.json; nothing to do.");
418        return;
419    }
420
421    if tcfg.emit_build_files {
422        let crate_file =
423            emit_build_files(&tcfg, &build_dir, top_level_ccfg, Some(workspace_members));
424        reorganize_definitions(&tcfg, &build_dir, crate_file)
425            .unwrap_or_else(|e| warn!("Reorganizing definitions failed: {}", e));
426    }
427
428    tcfg.check_if_all_binaries_used(&transpiled_modules);
429}
430
431/// Ensure that clang can locate the system headers on macOS 10.14+.
432///
433/// MacOS 10.14 does not have a `/usr/include` folder even if Xcode
434/// or the command line developer tools are installed as explained in
435/// this [thread](https://forums.developer.apple.com/thread/104296).
436/// It is possible to install a package which puts the headers in
437/// `/usr/include` but the user doesn't have to since we can find
438/// the system headers we need by running `xcrun --show-sdk-path`.
439fn get_extra_args_macos() -> Vec<String> {
440    let mut args = vec![];
441    if cfg!(target_os = "macos") {
442        let usr_incl = Path::new("/usr/include");
443        if !usr_incl.exists() {
444            let output = process::Command::new("xcrun")
445                .args(["--show-sdk-path"])
446                .output()
447                .expect("failed to run `xcrun` subcommand");
448            let mut sdk_path = String::from_utf8(output.stdout).unwrap();
449            let olen = sdk_path.len();
450            sdk_path.truncate(olen - 1);
451            sdk_path.push_str("/usr/include");
452
453            args.push("-isystem".to_owned());
454            args.push(sdk_path);
455        }
456
457        // disable Apple's blocks extension; see https://github.com/immunant/c2rust/issues/229
458        args.push("-fno-blocks".to_owned());
459    }
460    args
461}
462
463fn invoke_refactor(_build_dir: &Path) -> Result<(), Error> {
464    Ok(())
465}
466
467fn reorganize_definitions(
468    tcfg: &TranspilerConfig,
469    build_dir: &Path,
470    crate_file: Option<PathBuf>,
471) -> Result<(), Error> {
472    // We only run the reorganization refactoring if we emitted a fresh crate file
473    if crate_file.is_none() || tcfg.disable_refactoring || !tcfg.reorganize_definitions {
474        return Ok(());
475    }
476
477    invoke_refactor(build_dir)?;
478    // fix the formatting of the output of `c2rust-refactor`
479    let status = process::Command::new("cargo")
480        .args(["fmt"])
481        .current_dir(build_dir)
482        .status()?;
483    if !status.success() {
484        warn!("cargo fmt failed, code may not be well-formatted");
485    }
486    Ok(())
487}
488
489fn transpile_single(
490    tcfg: &TranspilerConfig,
491    input_path: &Path,
492    ancestor_path: &Path,
493    build_dir: &Path,
494    cc_db: &Path,
495    extra_clang_args: &[&str],
496) -> TranspileResult {
497    let output_path = get_output_path(tcfg, input_path, ancestor_path, build_dir);
498    if output_path.exists() && !tcfg.overwrite_existing {
499        warn!("Skipping existing file {}", output_path.display());
500        return Err(());
501    }
502
503    let file = input_path.file_name().unwrap().to_str().unwrap();
504    if !input_path.exists() {
505        warn!(
506            "Input C file {} does not exist, skipping!",
507            input_path.display()
508        );
509        return Err(());
510    }
511
512    if tcfg.verbose {
513        println!("Additional Clang arguments: {}", extra_clang_args.join(" "));
514    }
515
516    // Extract the untyped AST from the CBOR file
517    let untyped_context = match ast_exporter::get_untyped_ast(
518        input_path,
519        cc_db,
520        extra_clang_args,
521        tcfg.debug_ast_exporter,
522    ) {
523        Err(e) => {
524            warn!(
525                "Error: {}. Skipping {}; is it well-formed C?",
526                e,
527                input_path.display()
528            );
529            return Err(());
530        }
531        Ok(cxt) => cxt,
532    };
533
534    println!("Transpiling {}", file);
535
536    if tcfg.dump_untyped_context {
537        println!("CBOR Clang AST");
538        println!("{:#?}", untyped_context);
539    }
540
541    // Convert this into a typed AST
542    let typed_context = {
543        let conv = ConversionContext::new(input_path, &untyped_context);
544        if conv.invalid_clang_ast && tcfg.fail_on_error {
545            panic!("Clang AST was invalid");
546        }
547        conv.into_typed_context()
548    };
549
550    if tcfg.dump_typed_context {
551        println!("Clang AST");
552        println!("{:#?}", typed_context);
553    }
554
555    if tcfg.pretty_typed_context {
556        println!("Pretty-printed Clang AST");
557        println!("{:#?}", Printer::new(io::stdout()).print(&typed_context));
558    }
559
560    // Perform the translation
561    let (translated_string, pragmas, crates) =
562        translator::translate(typed_context, tcfg, input_path);
563
564    let mut file = match File::create(&output_path) {
565        Ok(file) => file,
566        Err(e) => panic!(
567            "Unable to open file {} for writing: {}",
568            output_path.display(),
569            e
570        ),
571    };
572
573    match file.write_all(translated_string.as_bytes()) {
574        Ok(()) => (),
575        Err(e) => panic!(
576            "Unable to write translation to file {}: {}",
577            output_path.display(),
578            e
579        ),
580    };
581
582    Ok((output_path, pragmas, crates))
583}
584
585fn get_output_path(
586    tcfg: &TranspilerConfig,
587    input_path: &Path,
588    ancestor_path: &Path,
589    build_dir: &Path,
590) -> PathBuf {
591    // When an output file name is not explicitly specified, we should convert files
592    // with dashes to underscores, as they are not allowed in rust file names.
593    let file_name = input_path
594        .file_name()
595        .unwrap()
596        .to_str()
597        .unwrap()
598        .replace('-', "_");
599
600    let mut input_path = input_path.with_file_name(file_name);
601    input_path.set_extension("rs");
602
603    if tcfg.output_dir.is_some() {
604        let path_buf = input_path
605            .strip_prefix(ancestor_path)
606            .expect("Couldn't strip common ancestor path");
607
608        // Place the source files in build_dir/src/
609        let mut output_path = build_dir.to_path_buf();
610        output_path.push("src");
611        for elem in path_buf.iter() {
612            let path = Path::new(elem);
613            let name = get_module_name(path, false, true, false).unwrap();
614            output_path.push(name);
615        }
616
617        // Create the parent directory if it doesn't exist
618        let parent = output_path.parent().unwrap();
619        if !parent.exists() {
620            fs::create_dir_all(parent).unwrap_or_else(|_| {
621                panic!("couldn't create source directory: {}", parent.display())
622            });
623        }
624        output_path
625    } else {
626        input_path
627    }
628}