mktool 1.5.8

General purpose utility to enhance pkgsrc/mk infrastructure
/*
 * Copyright (c) 2024 Jonathan Perkin <jonathan@perkin.org.uk>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

use crate::build_thread_pool;
use clap::Args;
use elf::ElfBytes;
use elf::endian::AnyEndian;
use rayon::prelude::*;
use std::collections::HashSet;
use std::fs;
use std::io::{self, BufRead, BufReader};
use std::path::PathBuf;
use std::process::{Command, Stdio};

struct CTFFile {
    path: PathBuf,
    error: String,
}

#[derive(Args, Debug)]
pub struct CTFConvert {
    #[arg(short = 'c', value_name = "ctfconvert")]
    #[arg(help = "Path to ctfconvert command")]
    ctfconvert: PathBuf,

    #[arg(short = 'I', value_name = "input")]
    #[arg(help = "Read files from input file (\"-\" for stdin)")]
    input: Option<PathBuf>,

    #[arg(short = 'j', value_name = "jobs")]
    #[arg(help = "Maximum number of threads (or \"MKTOOL_JOBS\" env var)")]
    jobs: Option<usize>,

    #[arg(short = 's', value_name = "prefix")]
    #[arg(help = "Prefix to strip from output")]
    strip_prefix: PathBuf,
}

impl CTFConvert {
    pub fn run(&self) -> Result<i32, Box<dyn std::error::Error>> {
        /*
         * Add files passed in via -I (supporting stdin if set to "-").
         */
        let mut inputfiles: HashSet<PathBuf> = HashSet::new();

        if let Some(infile) = &self.input {
            let reader: Box<dyn io::BufRead> = match infile.to_str() {
                Some("-") => Box::new(io::stdin().lock()),
                Some(f) => Box::new(BufReader::new(fs::File::open(f)?)),
                None => {
                    eprintln!(
                        "ERROR: File '{}' is not valid unicode.",
                        infile.display()
                    );
                    std::process::exit(1);
                }
            };
            for line in reader.lines() {
                let line = line?;
                let file = PathBuf::from(line);
                if file.exists() {
                    inputfiles.insert(file);
                }
            }
        }

        /*
         * No input files, return early.
         */
        if inputfiles.is_empty() {
            return Ok(0);
        }

        /*
         * Create Vec of CTFFiles for parallel processing.
         */
        let mut inputfiles: Vec<CTFFile> = inputfiles
            .into_iter()
            .map(|path| CTFFile { path, error: String::new() })
            .collect();

        let pool = build_thread_pool(self.jobs)?;

        /*
         * The behaviour of ctfconvert with the "-m" flag that we use is as
         * follows:
         *
         *  - A successful conversion exits 0 with an output file but no
         *    stdout.
         *
         *  - A successful conversion with some issues exits 0 with warnings
         *    on stderr (for example "WARNING: file putenv.c is missing debug
         *    information")
         *
         *  - A failed conversion of a file that already contains CTF data
         *    exits 0 with no output and no output file.
         *
         *  - A failed conversion of a file that does not contain debug data
         *    exits 0 with no output and no output file.
         *
         *  - Attempting to convert a non-binary file (e.g. a shell script)
         *    exits 1 with some stderr.
         *
         * Thus we do the following:
         *
         *   - First check that the file is valid ELF, and skip all others.
         *
         *   - Send all stderr from ctfconvert back to stderr, with the
         *     filename added as a prefix.
         *
         *   - Ignore the exit status of ctfconvert, just test for the
         *     presence of an output file.
         *
         *   - If the output file contains .SUNW_ctf then print it to stdout
         *     to indicate a successful conversion, otherwise record an error.
         */
        pool.install(|| {
            inputfiles.par_iter_mut().for_each(|file| {
                let infile = match fs::read(&file.path) {
                    Ok(f) => f,
                    Err(e) => {
                        file.error = format!("unable to read file: {e}");
                        return;
                    }
                };
                if ElfBytes::<AnyEndian>::minimal_parse(&infile).is_err() {
                    return;
                }

                let mut outfile = PathBuf::from(&file.path);
                if let Some(fname) = outfile.file_name() {
                    let mut newname = fname.to_os_string();
                    newname.push(".ctf");
                    outfile.set_file_name(newname);
                }

                let cmd = match Command::new(&self.ctfconvert)
                    .arg("-m")
                    .arg("-o")
                    .arg(&outfile)
                    .arg(&file.path)
                    .stdout(Stdio::piped())
                    .stderr(Stdio::piped())
                    .spawn()
                {
                    Ok(c) => c,
                    Err(e) => {
                        file.error =
                            format!("unable to execute ctfconvert: {e}");
                        return;
                    }
                };
                let cmd = match cmd.wait_with_output() {
                    Ok(c) => c,
                    Err(e) => {
                        file.error = format!("ctfconvert failed: {e}");
                        let _ = fs::remove_file(&outfile);
                        return;
                    }
                };

                /*
                 * The input files are usually ${DESTDIR}${PREFIX}/... and so the
                 * -s flag allows that prefix to be pruned for cleaner output.
                 */
                let filename = match &file.path.strip_prefix(&self.strip_prefix)
                {
                    Ok(s) => s.display(),
                    Err(_) => file.path.display(),
                };

                /*
                 * Print conversion errors to stderr, prefixed with the file
                 * that caused them for easier diagnosis.
                 */
                let stderr = String::from_utf8_lossy(&cmd.stderr);
                for line in stderr.lines() {
                    eprintln!("{filename}: {line}");
                }

                if outfile.exists() {
                    let out = match fs::read(&outfile) {
                        Ok(f) => f,
                        Err(e) => {
                            file.error = format!("unable to read output: {e}");
                            let _ = fs::remove_file(&outfile);
                            return;
                        }
                    };
                    let elf = match ElfBytes::<AnyEndian>::minimal_parse(
                        out.as_slice(),
                    ) {
                        Ok(e) => e,
                        Err(e) => {
                            file.error =
                                format!("output is not valid ELF: {e}");
                            let _ = fs::remove_file(&outfile);
                            return;
                        }
                    };
                    match elf.section_header_by_name(".SUNW_ctf") {
                        Ok(Some(_)) => {
                            println!("{filename}");
                            if let Err(e) = fs::rename(&outfile, &file.path) {
                                file.error =
                                    format!("unable to rename output: {e}");
                                let _ = fs::remove_file(&outfile);
                            }
                        }
                        Ok(None) => {
                            file.error =
                                "output does not contain CTF data".to_string();
                            let _ = fs::remove_file(&outfile);
                        }
                        Err(e) => {
                            file.error =
                                format!("unable to parse ELF sections: {e}");
                            let _ = fs::remove_file(&outfile);
                        }
                    }
                }
            });
        });

        let mut rv = 0;
        for file in &inputfiles {
            if !file.error.is_empty() {
                eprintln!("{}: {}", file.path.display(), file.error);
                rv = 1;
            }
        }

        Ok(rv)
    }
}