1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
//! Repackage `.crate` files under a different crate name.
//!
//! This crate provides [`repackage::dot_crate`](dot_crate), which repackages a `.crate` file so
//! that it exports the same crate under a different name. It replaces the `name` attribute in
//! `Cargo.toml`, and also rewrites references to the old name in the various `.rs` files that live
//! outside of `src/` (those in `src/` use `crate::`).
//!
//! # Rewriting .rs files
//!
//! Normally, rewriting the `name` in `Cargo.toml` should be sufficient for _most_ use-cases.
//! Consumers of a `.crate` file likely only care about the exported library, which only ever
//! refers to itself using paths starting with `crate::` or `::`, not including the name. Tests and
//! binaries do have to name the library crate, but are usually not used by downstream consumers of
//! the `.crate`. But, _just_ in case, this crate tries to modify those files as well using some
//! simple string replacement. It's brittle though, so you might only get so far with that approach
//! if you make heavy use of non-library artifacts in the produced `.crate` files.
#![warn(missing_docs, broken_intra_doc_links)]

use anyhow::Context;
use cargo_toml::Manifest;
use std::io::prelude::*;
use std::path::{Path, PathBuf};

/// Repackage the crate contained in the `.crate` tarball at `dot_crate` as `new_name`.
///
/// Pass in the old crate name to verify that the crate you are repackaging is in fact the one you
/// think it is. If you do not, it will be inferred from the name of the `.crate` file. The old
/// name is needed one way or the other in order to also replace references to the crate inside
/// non-`src/` .rs files.
///
/// The repackaged file will end up next to the current `.crate` file with the crate name replaced
/// appropriately. In other words, if you are replacing `foo` with `bar`, and give the input file
/// `baz/foo-0.1.0.crate`, the repackaged crate file will be `baz/bar-0.1.0.crate`.
pub fn dot_crate(
    dot_crate: impl AsRef<Path>,
    old_name: Option<&str>,
    new_name: &str,
) -> anyhow::Result<()> {
    let dot_crate = dot_crate.as_ref();

    // We want to use the same file path, but with the crate name replaced.
    // To do that we first need to extract the file name portion of the .crate path:
    let old_fn = dot_crate
        .file_name()
        .ok_or_else(|| anyhow::anyhow!(".crate file path '{}' is not a file", dot_crate.display()))?
        .to_str()
        .ok_or_else(|| {
            anyhow::anyhow!(
                ".crate file path '{}' is not valid utf-8",
                dot_crate.display()
            )
        })?;

    // Next, we verify that the .crate file is actually for the crate the user wanted to replace.
    // Otherwise, we might be repackaging some entirely different crate. Now, that will also be
    // caught once we get to the Cargo.toml file and verify its name, but if we can catch a mistake
    // sooner, that's better.
    //
    // Let's also handle the somewhat unlikely (but possible) prefix problem: Imagine someone wants
    // to rewrite the net crate to net2, but then pass us the crate file:
    //
    //     netscape-0.1.0.crate
    //
    // Sure, it _starts_ with net, but it's probably not the .crate file they intended to pass us.
    //
    // The trick is to look for the first . (which cannot appear in crate names), and walk
    // _backwards_ from there.
    let mut prefix = None;
    if let Some(dot) = old_fn.find('.') {
        if let Some(dash) = old_fn[..dot].rfind('-') {
            let name = &old_fn[..dash];
            let major = &old_fn[(dash + 1)..dot];
            if !name.is_empty() && !major.is_empty() && major.chars().all(|c| c.is_ascii_digit()) {
                prefix = Some(name);
            }
        }
    }
    if old_name.is_some() && prefix != old_name {
        anyhow::bail!(
            ".crate file '{}' does not match given old name '{}'",
            dot_crate.display(),
            old_name
                .as_ref()
                .expect("check for is_some in if conditional"),
        );
    }
    let old_name = old_name
        .or(prefix)
        .ok_or_else(|| anyhow::anyhow!("failed to infer current crate name"))?;

    let repackaged_fn = old_fn.replace(old_name, new_name);
    let repackaged_path = dot_crate.with_file_name(repackaged_fn);
    let repackaged = std::fs::File::create(&repackaged_path)?;
    let dot_crate_path = dot_crate;
    let dot_crate = std::fs::File::open(&dot_crate_path)?;

    // https://github.com/rust-lang/cargo/blob/8e075c9cab41eb1ed6222f819924999476477f2e/src/cargo/ops/cargo_package.rs#L481
    let dot_crate = flate2::read::GzDecoder::new(dot_crate);
    let mut dot_crate = tar::Archive::new(dot_crate);
    let repackaged = flate2::GzBuilder::new().write(repackaged, flate2::Compression::best());
    let mut repackaged = tar::Builder::new(repackaged);

    // We've got to be a little careful with replacements in .rs files.
    //
    // Imagine that a crate is called toml, and there's a struct field in the program called toml.
    // We obviously don't want to replace that, as it may be referenced elsewhere (might even be a
    // public field!). The same concern applies to both prefixes and suffixes.
    //
    // Luckily, crate names should only really show up in paths. That is, as crate_name::.
    // Teeechnically it can also show up as "use crate_name;" or "extern crate crate_name;", or
    // _even_ "extern crate crate_name as foobar;", but we're going to ignore those here since they
    // first two are trivial to fix in the code, and the last will break our renaming anyway.
    //
    // And for good measure, we also need to make sure the path is preceeded by a space, otherwise
    // our `toml` example would also rewrite
    //
    //     use foo_toml::bar;
    //
    // and
    //
    //     use foo::toml::bar;
    //
    // which we don't want. This will also still work in cases like toml::some_func(a).
    //
    // Unfortunately, it will _not_ work for anyone who tries to be fancy, such as by using tabs
    // over spaces, invoking macros by their full path at the top level of the file, or providing
    // full paths to functions and types as the first argument to a function
    // (`.any(toml::is_foo)`). Those _should_ be rare though, and keep in mind this does not apply
    // for files in `src/`, so let's consider it good enough until someone complains.
    let from = format!(" {}::", old_name.replace('-', "_"));
    let to = format!(" {}::", new_name.replace('-', "_"));

    // We also need to modify all paths inside the archive to start at new-name-0.1.0/ rather than
    // old-name-0.1.0/. This is simple enough as we're replacing the path wholesale.
    let old_base_dir = {
        let mut d = PathBuf::from(old_fn);
        d.set_extension("");
        d
    };
    let new_base_dir = {
        let mut d = PathBuf::from(old_fn.replace(old_name, new_name));
        d.set_extension("");
        d
    };

    let mut got_cargo_toml = false;
    let mut file_bytes = String::new();
    for file in dot_crate
        .entries()
        .context("walk entries from .crate file")?
    {
        let mut file = file.context("walk entry from .crate file")?;
        let mut header = file.header().clone();
        let path = file.path().context("get .crate file entry path")?;
        let sub_path = path.strip_prefix(&old_base_dir).map_err(|_| {
            anyhow::anyhow!(
                ".crate contained entry not under old crate subdir: {}",
                path.display()
            )
        })?;
        let path = new_base_dir.join(sub_path);

        if path.file_name() == Some(std::ffi::OsStr::new("Cargo.toml")) {
            // To avoid reading into memory we need:
            // https://github.com/alexcrichton/toml-rs/issues/215
            let mut toml_bytes = Vec::with_capacity(file.size() as usize);
            file.read_to_end(&mut toml_bytes)
                .context("read Cargo.toml from .crate file")?;
            let mut manifest =
                Manifest::from_slice(&toml_bytes).context("parse Cargo.toml from .crate file")?;
            if manifest.workspace.is_some() {
                anyhow::bail!(".crate file is a workspace, so is not packaged");
            }
            let p = manifest.package.as_mut().ok_or_else(|| {
                anyhow::anyhow!("Cargo.toml in .crate file does not contain a package")
            })?;
            if &p.name != old_name {
                anyhow::bail!(
                    "crate name in .crate ('{}') file did not match given name ('{}')",
                    p.name,
                    old_name
                );
            }
            p.name = new_name.to_string();

            // Work around https://gitlab.com/crates.rs/cargo_toml/-/issues/3
            // See https://github.com/alexcrichton/toml-rs/issues/142#issuecomment-278970591
            let manifest =
                toml::Value::try_from(&manifest).context("serialize modified Cargo.toml")?;
            let bytes = toml::to_vec(&manifest).context("serialize modified Cargo.toml")?;
            let mut bytes = &bytes[..]; // to give us io::Read
            header.set_size(bytes.len() as u64);
            header.set_cksum();
            repackaged
                .append_data(&mut header, path, &mut bytes)
                .context("append modified Cargo.toml to new .crate file")?;

            got_cargo_toml = true;
        } else if !path.starts_with("src") && path.extension().map(|e| e == "rs").unwrap_or(false) {
            // Replace previous_crate_name with new_crate_name.
            //
            // Binaries, tests, etc. will contain previous_crate_name:: paths, which we need to
            // re-write so that they still work after we change the top-level crate name. We
            // _could_ try to inject `extern crate previous_crate_name as new_crate_name`, but it
            // gets tricky as those can only be injected at the top-level crate entry point (and
            // only after //!, #!, /*!, etc.), so just straight up replacing is easier.
            //
            // Now, _technically_ this replacement shouldn't matter, since we're modifying a
            // package in a `.crate`, so any consumers should only be using the `lib` of the
            // current package anyway. And `lib` lives in `src/` (let's go ahead and assume they
            // haven't changed that) and refers to the current crate using `::` or `crate::`,
            // neither of which contain the current crate's name.
            //
            // But, we go for best effort anyway.

            // It would be nice if we could do the rewrite in a streaming fashion.
            // Unfortunately, doing so is tricky for two main reasons:
            //
            //  1) replacing the crate name changes the file size. We have to declare the size in
            //     the header, but we don't know how many replacements we're going to do until
            //     we've passed over the data!
            //  2) the crate name may appear at a chunk boundary.
            //
            // So, we just read the file into memory and then do the replacement(s) there.
            file_bytes.clear();
            file.read_to_string(&mut file_bytes)
                .context("read .rs file for in-place modification")?;

            let file_bytes = if file_bytes.contains(&from) {
                std::borrow::Cow::Owned(file_bytes.replace(&from, &to))
            } else {
                std::borrow::Cow::Borrowed(&file_bytes)
            };

            header.set_size(file_bytes.bytes().len() as u64);
            repackaged.append_data(&mut header, path, &mut file_bytes.as_bytes())?;
        } else {
            repackaged
                .append_data(&mut header, path, file)
                .context("append unmodified file to new .crate file")?;
        }
    }

    if !got_cargo_toml {
        let _ = std::fs::remove_file(repackaged_path);
        anyhow::bail!(
            ".crate file {} did not contain a Cargo.toml file",
            dot_crate_path.display()
        );
    }

    Ok(())
}