bundle-sources 0.0.1

library and program for generating source code bundles (eg for AGPL compliance
Documentation
// Copyright 2020 Ian Jackson
// SPDX-License-Identifier: GPL-3.0-or-later
// There is NO WARRANTY.

/// Unfortunately, this package is not yet documented.

// BUNDLE_RUST_SOURCES_TEST_OUTPUT=/home/rustcargo/Rustup/Tau/bundle-sources/d rustcargo test

mod imports;

pub mod immutable;
pub mod utils;

mod bundle;
mod html;

pub use crate::bundle::*;

use html::*;
pub use html::{Html,HtmlRef};

use crate::immutable::Immutable;
use crate::imports::*;
use crate::utils::*;

#[derive(Error,Debug)]
pub enum Error {
  #[error("output leafname {0:?} has bad syntax")]
  BadLeafBasename(String),

  #[error("output leafname {leafname:?} reused ({first} vs {second})")]
  LeafReused { leafname : String, first : String, second : String },

  #[error("full leafname {leafname:?} generated twice by same bundle from different basenames, now due to bundle {} - bundle id is not unique or seems to generate inconsistent suffix, or something?")]
  ComponentLeafGeneratedTwiceInconsistency
  { bundle_id : String, leafname : String },
}

use Error::*;

pub struct SourceGenerator {
  output_dir : Immutable<String>,
  config : Config,
  items : BTreeMap<Category, Vec<OutputItem>>,
  used_leaves : HashMap<String, String>,
}

pub struct Config {
  include_upstream : bool,
  cargo_source_cat : Box<dyn FnMut(&Option<Source>) -> Result<Category,E>>,
  tidy : bool,
}

impl Default for Config {
  fn default() -> Config {
    let cargo_source_cat = Box::new(|s : &Option<Source>| Ok(
      if s.as_ref().map_or(false, |s : &Source| s.is_crates_io())
      { Upstream } else { Modified }
    ));


    Config {
      include_upstream : true,
      tidy : true,
      cargo_source_cat,
    }
  }
}

impl Config {
  pub fn new() -> Config { Default::default() }
  pub fn include_upstream(&mut self, i : bool) -> &mut Self {
    self.include_upstream = i; self
  }
  pub fn tidy(&mut self, i : bool) -> &mut Self {
    self.tidy = i; self
  }
}

#[derive(Debug,Clone,Serialize)]
pub struct OutputItem {
  pub leafname : String,
  pub desc : Html,
  cat : Category,
}

#[derive(Debug,Ord,PartialOrd,Eq,PartialEq,Copy,Clone,Hash)]
#[derive(Serialize,Deserialize)]
pub enum Category {
  Upstream,
  Mixed,
  Modified,
  Metadata,
}
use Category::*;

impl SourceGenerator {
  //#[throws(E)]
  pub fn new_adding(output_dir : String) -> Result<SourceGenerator,E> {
    let output_dir = output_dir.to_owned();
    match create_dir(&output_dir) {
      Ok(_)                               => (),
      Err(e) if e.kind() == AlreadyExists => (),
      e @ Err(_) => e.with_context(|| format!(
        "new_adding: create output directory, if needed {:?}",
        &output_dir
      ))?
    };
    Ok( Self::new_return(output_dir) )
  }

  #[throws(E)]
  pub fn new_trashing(output_dir : String) -> SourceGenerator {
    let output_dir = output_dir.to_owned();
    match remove_dir_all(&output_dir) {
      Ok(_) => { },
      Err(e) if e.kind() == NotFound => { },
      e @ Err(_) => 
        e.cxm(||format!(
          "new_trashing: remove original output directory {:?}",
          &output_dir))?,
    }
    create_dir(&output_dir).cxm(||format!(
      "new_trashing: create new output directory {:?}",
      &output_dir))?;
    Self::new_return(output_dir)
  }

  fn new_return(output_dir : String) -> SourceGenerator {
    let output_dir = output_dir.into();
    let items = Default::default();
    let used_leaves = HashMap::new();
    let config = Config::new();
    SourceGenerator { output_dir, items, used_leaves, config }
  }
  pub fn configure(&mut self, config : Config) -> &mut Self {
    self.config = config;
    self
  }

  fn output_file<T : AsRef<str>>(&self, sub : T) -> String {
    self.output_dir.to_owned() + "/" + sub.as_ref()
  }

  #[throws(E)]
  /// `desc` should be a single line of text in HTML syntax
  pub fn add_component<C : Component>(&mut self,
            src : &C, basename : &str,
            cat : Category, desc : Html)
  {
    let basename = basename.to_owned();
    let baseleaf = basename; // makes basename unuseable
    let bundle_id = format!("{:?} {:?}", cat, src);
    if self.check_record_leaf_done(&baseleaf, &bundle_id)? { return }

    let basepath = self.output_file(&baseleaf);
    let suffix = src.bundle(&basepath)?;
    let already_checked = suffix.is_none();

    let suffix = suffix.as_ref().map(|s| s.as_ref()).unwrap_or("");
    let fullleaf = baseleaf + suffix;
    self.add_bundle(fullleaf, cat, desc, bundle_id, already_checked)?;
  }

  #[throws(Error)]
  fn check_record_leaf_done(&mut self, leafname : &str, bundle_id : &str)
                            -> bool {
    let leafname = leafname.to_owned();
    let bundle_id = bundle_id.to_owned();
    if leafname.len() == 0
      || !leafname.starts_with(|c:char| c.is_ascii_lowercase())
      || leafname.find('/').is_some()
      || leafname.split('.').next().unwrap() == INDEX_BASENAME {
        throw!(BadLeafBasename(leafname))
      }
    match self.used_leaves.entry(leafname) {
      Vacant(e) => {
        e.insert(bundle_id);
        false
      }
      Occupied(e) if e.get() == &bundle_id => {
        true
      }
      Occupied(e) => {
        throw!(LeafReused{
          leafname : e.key().clone(),
          first : e.get().clone(),
          second : bundle_id,
        })
      }
    }
  }

  #[throws(Error)]
  fn add_bundle(&mut self, fullleaf : String,
                cat : Category, desc : Html, bundle_id : String,
                already_checked: bool) {
    if !already_checked {
      let already = self.check_record_leaf_done(&fullleaf, &bundle_id)?;
      if already { throw!(ComponentLeafGeneratedTwiceInconsistency{
        bundle_id, leafname : fullleaf }) }
    }
    self.items.entry(cat).or_default().push(
      OutputItem { leafname : fullleaf, desc, cat });
  }

  #[throws(E)]
  /// Some("Cargo.toml") mentions the package name for the metadata json
  pub fn add_cargo_packages(&mut self, spec_manifest_path : Option<&str>) {
    let manifest_path = match spec_manifest_path
      .map( |i| i.to_owned() )
    {
      Some(i) if i == "Cargo.toml" || i.ends_with("/Cargo.toml") => i,
      Some(i) => { i + "/Cargo.toml" },
      None => {
        match env::var("CARGO_MANIFEST_DIR") {
          Err(VarError::NotPresent) => "Cargo.toml".to_owned(),
          e @ Err(_) => e.cxm(||"looking up CARGO_MANIFEST_DIR".to_owned())?,
          Ok(d) => d + "/Cargo.toml",
        }
      }
    };

    #[throws(E)]
    fn package_leafname(name : &str, version : &str) -> String {
      let check = |s : &str, what|{
        let badness = s.trim_matches(
          |c : char| c=='.' || c=='_' || c=='-' || c=='+' ||
                     c.is_ascii_alphanumeric()
        );
        if badness.len() != 0 {
          Err(anyhow!("package {:?} {} bad syntax ({:?})",
                      name, what, badness))?;
        }
        <Result<(),E>>::Ok(())
      };
      check(version,"version")?;
      check(name,"name")?;
      format!("{}={}", name, version)
    }
  
    let (metafile_leafname, metafile_desc) = if spec_manifest_path.is_none() {
      ("cargo-metadata.json".to_owned(),
       Html("Rust cargo metadatafile".to_owned()))
    } else {
      (||{
        let manifest_data = fs::read_to_string(&manifest_path)?;
        let manifest_data = manifest_data.parse()?;
        let getkey
          : for<'v> fn(&'v toml::value::Value,&'_ str) -> Result<&'v _,E>
          = |d,k| {
            d.get(k).ok_or_else(
              ||anyhow!(r#"missing "{}" in Cargo.toml"#, k))
          };
        let getstr = |d, k| {
          getkey(d,k)?.as_str().ok_or_else(
            ||anyhow!(r#"value for "{}" is not a string"#, k))
        };
        let package = getkey(&manifest_data,"package")?;
        let name = getstr(package,"name")?;
        let version = getstr(package,"version")?;
        let metafile_base = package_leafname(&name, &version)?;
        <Result<_,E>>::Ok((
          metafile_base + ".cargo-metadata.json",
          Html(format!("Rust cargo metadatafile for package {}",
                       Html::from_literal(&name).0))
        ))
      })().cxm(||format!("read manifest {:?} to find package name and version",
                         manifest_path))?
    };

    let metafile_path = self.output_file(&metafile_leafname);

    let mut cmd = MetadataCommand::new()
      .manifest_path(&manifest_path)
      .other_options(vec!["--offline".to_string()])
      .other_options(vec!["--locked".to_string()])
      .cargo_command()?;
    cmd
      .stdout(File::create(&metafile_path)?);
    if let Some(cd) = env::var_os("CARGO_HOME") {
      cmd.current_dir(cd);
    }
    let st = cmd
      .status()?;

    let bundle_id = format!("add_cargo_packages(manifest_path={:?})",
                            &manifest_path);

    self.add_bundle(metafile_leafname, Metadata,
                    metafile_desc, bundle_id, false)?;

    check_exit_status(st).context("cargo metadata failed")?;

    let metadata = fs::read_to_string(metafile_path)
      .context("cannot read metadata file")?;
    let mut metadata = MetadataCommand::parse(&metadata)?;

    metadata.packages.sort_by(|a,b|{
      let gf : fn(&_) -> (&_,&_,&_) =
        |p : &Package| (&p.name, &p.version, &p.id);
      gf(a).cmp(&gf(b))
    });
    for package in &metadata.packages {
      let pmf = &package.manifest_path;
      let src = pmf.parent().ok_or_else(
        || anyhow!("manifest path without directory: {:?}", &package)
      )?.to_str().ok_or_else(
        || anyhow!("manifest path cannot be converted to str {:?}", &package)
      )?;
      let outleaf = package_leafname(&package.name,
                                     &format!("{}",&package.version))?;
      let desc = Html(format!(
        "rust package {} version {}",
        Html::from_literal(&package.name).0,
        Html::from_literal(&format!("{}",package.version)).0));

      let cat = (self.config.cargo_source_cat)(&package.source)?;
      self.add_component(&DirectoryComponent::new(src.to_owned())?,
                         &outleaf, cat, desc)?;
    }
  }

  #[throws(E)]
  pub fn aggregate(mut self) -> Vec<OutputItem> {
    let mut categories : Vec<_> = self.items.keys().map(|c| *c).collect();
    categories.sort();
    categories.reverse();

    let mut results = vec![];
    {
      let include_upstream = self.config.include_upstream;
      let mut agg = |mincat, nomcat, outleaf, desc |{
        self.aggregate_min_category(&categories,&mut results,
                                    mincat,nomcat,outleaf,desc)
          .cxm(||format!("failure aggregating {}", outleaf))
      };
      agg(Mixed, Modified, "Modified",
          &HtmlRef("Sources, locally modified"))?;
      if include_upstream {
        agg(Upstream, Mixed, "Comprehensive",
            &HtmlRef("All sources"))?;
      }
    }

    let mut index = IndexWriter::new(&self.output_dir,
                                     &HtmlRef("Master index"))?;
    {
      let mut table = index.table(Html("Source code bundles".to_owned()))?;
      
      for &OutputItem { leafname : ref tarfile, ref desc, .. } in &results {
        table.entry(tarfile, &desc.as_ref())?;
      }
    }
    index.finish()?;

    if self.config.tidy {
      for OutputItem { ref leafname, .. } in self.items.values().flatten() {
        remove_file(self.output_file(leafname)).cxm(||format!(
          "failed to remove {:?} while tidying up", leafname
        ))?;
      }
    }

    (||{
      let mut f = File::create(self.output_file("index.json"))
        .context("create output file")?;
      writeln!(f,"{}",&serde_json::to_string(&results)?)?;
      f.flush()?;
      <Result<(),E>>::Ok(())
    })().context("create index.json file")?;

    let mut output = vec![ OutputItem{
      leafname : INDEX.to_owned(),
      cat : Metadata,
      desc : Html("index of output bundles".to_owned())
    }];
    output.extend(results);
    output
  }

  #[throws(E)]
  fn aggregate_min_category(&mut self, cats : &Vec<Category>,
                            results : &mut Vec<OutputItem>,
                            mincat : Category, nomcat : Category,
                            outleafbase : &str, desc : &HtmlRef) {
    let tarfile = outleafbase.to_string() + ".tar.gz";

    let linkpath = self.output_file(&outleafbase);
    remove_file(&linkpath).or_else(
      |e| if e.kind() == NotFound { Ok(()) } else { Err(e) })?;
    symlink(".", &linkpath)?;

    let mut leaves = vec![ INDEX.to_owned() ];
    let mut index = IndexWriter::new(&self.output_dir,desc)?;

    for &cat in cats {
      let mut table = index.table(match cat {
        Metadata => HtmlRef("Metadata and index files"),
        Modified => HtmlRef("Local sources, possibly modified"),
        Upstream => HtmlRef("Upstream sources included for completeness"),
        Mixed    => HtmlRef("Mixed, other or unknown content"),
      }.to_owned())?;
      for &OutputItem { ref leafname, cat, ref desc } in &self.items[&cat] {
        if cat < mincat { continue }
            leaves.push(leafname.to_owned());
        table.entry(&leafname, &desc.as_ref())?;
      }
    }
    index.finish()?;

    let mut cmd = tar_command();
    cmd
      .current_dir(&*self.output_dir)
      .args(&["--use-compress-program=gzip --rsyncable",
              "--no-recursion",
              "-cf", &tarfile, "--"]);
    cmd
      .args( leaves.drain(0..).map(
        |l| outleafbase.to_owned() + "/" + &l
      ) );
    
    run_cmd(cmd).cxm(||format!("build aggregate (tar) {:?}", &desc))?;

    remove_file(linkpath)?;

    results.push(OutputItem {
      leafname : tarfile,
      desc : desc.to_owned(),
      cat : nomcat,
    });
  }
}

#[cfg(test)] use tempfile::TempDir;

#[test]
fn selftest() -> Result<(),E> {
//  use std::path::{Path,PathBuf};

  let output_dir = env::var_os("BUNDLE_RUST_SOURCES_TEST_OUTPUT");
  let mut _drop_temp = None;
  let output_dir = match output_dir {
    None => {
      _drop_temp = Some(TempDir::new()?);
      _drop_temp.as_ref().unwrap().path().as_os_str()
    }
    Some(ref output_dir) => output_dir,
  };
  let output_dir = output_dir.to_str().ok_or_else(
    || anyhow!("tempdir to_str!")
  )?;
  let mut sg = SourceGenerator::new_adding(output_dir.to_string())?;
  sg.add_cargo_packages(None)?;
  let _r = sg.aggregate()?;
  Ok(())
}