cargo_eval/util/
script.rs

1use anyhow::Context as _;
2
3use crate::CargoResult;
4
5const DEFAULT_EDITION: &str = "2021";
6const DEFAULT_VERSION: &str = "0.0.0";
7const DEFAULT_PUBLISH: bool = false;
8
9pub struct RawScript {
10    manifest: String,
11    body: String,
12    path: std::path::PathBuf,
13}
14
15impl RawScript {
16    pub fn parse_from(path: &std::path::Path) -> CargoResult<Self> {
17        let body = std::fs::read_to_string(path)
18            .with_context(|| format!("failed to script at {}", path.display()))?;
19        Self::parse(&body, path)
20    }
21
22    pub fn parse(body: &str, path: &std::path::Path) -> CargoResult<Self> {
23        let comment = match extract_comment(body) {
24            Ok(manifest) => Some(manifest),
25            Err(err) => {
26                log::trace!("failed to extract doc comment: {err}");
27                None
28            }
29        }
30        .unwrap_or_default();
31        let manifest = match extract_manifest(&comment)? {
32            Some(manifest) => Some(manifest),
33            None => {
34                log::trace!("failed to extract manifest");
35                None
36            }
37        }
38        .unwrap_or_default();
39        let body = body.to_owned();
40        let path = path.to_owned();
41        Ok(Self {
42            manifest,
43            body,
44            path,
45        })
46    }
47
48    pub fn to_workspace<'cfg>(
49        &self,
50        config: &'cfg cargo::Config,
51    ) -> CargoResult<cargo::core::Workspace<'cfg>> {
52        let target_dir = config.target_dir().transpose().unwrap_or_else(|| {
53            crate::config::default_target_dir().map(cargo::util::Filesystem::new)
54        })?;
55        // HACK: without cargo knowing about embedded manifests, the only way to create a
56        // `Workspace` is either
57        // - Create a temporary one on disk
58        // - Create an "ephemeral" workspace **but** compilation re-loads ephemeral workspaces
59        //   from the registry rather than what we already have on memory, causing it to fail
60        //   because the registry doesn't know about embedded manifests.
61        let manifest_path = self.write(config, target_dir.as_path_unlocked())?;
62        let workspace = cargo::core::Workspace::new(&manifest_path, config)?;
63        Ok(workspace)
64    }
65
66    fn write(
67        &self,
68        config: &cargo::Config,
69        target_dir: &std::path::Path,
70    ) -> CargoResult<std::path::PathBuf> {
71        let hash = self.hash().to_string();
72        assert_eq!(hash.len(), 64);
73        let mut workspace_root = target_dir.to_owned();
74        workspace_root.push("eval");
75        workspace_root.push(&hash[0..2]);
76        workspace_root.push(&hash[2..4]);
77        workspace_root.push(&hash[4..]);
78        workspace_root.push(self.package_name()?);
79        std::fs::create_dir_all(&workspace_root).with_context(|| {
80            format!(
81                "failed to create temporary workspace at {}",
82                workspace_root.display()
83            )
84        })?;
85        let manifest_path = workspace_root.join("Cargo.toml");
86        let manifest = self
87            .expand_manifest_(config)
88            .with_context(|| format!("failed to parse manifest at {}", self.path.display()))?;
89        let manifest = remap_paths(
90            manifest,
91            self.path.parent().ok_or_else(|| {
92                anyhow::format_err!("no parent directory for {}", self.path.display())
93            })?,
94        )?;
95        let manifest = toml::to_string_pretty(&manifest)?;
96        crate::util::write_if_changed(&manifest_path, &manifest)?;
97        Ok(manifest_path)
98    }
99
100    pub fn expand_manifest(&self, config: &cargo::Config) -> CargoResult<String> {
101        let manifest = self
102            .expand_manifest_(config)
103            .with_context(|| format!("failed to parse manifest at {}", self.path.display()))?;
104        let manifest = toml::to_string_pretty(&manifest)?;
105        Ok(manifest)
106    }
107
108    fn expand_manifest_(&self, config: &cargo::Config) -> CargoResult<toml::Table> {
109        let mut manifest: toml::Table = toml::from_str(&self.manifest)?;
110
111        for key in ["workspace", "lib", "bin", "example", "test", "bench"] {
112            if manifest.contains_key(key) {
113                anyhow::bail!("`{key}` is not allowed in embedded manifests")
114            }
115        }
116
117        // Prevent looking for a workspace by `read_manifest_from_str`
118        manifest.insert("workspace".to_owned(), toml::Table::new().into());
119
120        let package = manifest
121            .entry("package".to_owned())
122            .or_insert_with(|| toml::Table::new().into())
123            .as_table_mut()
124            .ok_or_else(|| anyhow::format_err!("`package` must be a table"))?;
125        for key in ["workspace", "build", "links"] {
126            if package.contains_key(key) {
127                anyhow::bail!("`package.{key}` is not allowed in embedded manifests")
128            }
129        }
130        let name = self.package_name()?;
131        let hash = self.hash();
132        let bin_name = format!("{name}_{hash}");
133        package
134            .entry("name".to_owned())
135            .or_insert(toml::Value::String(name));
136        package
137            .entry("version".to_owned())
138            .or_insert_with(|| toml::Value::String(DEFAULT_VERSION.to_owned()));
139        package.entry("edition".to_owned()).or_insert_with(|| {
140            let _ = config.shell().warn(format_args!(
141                "`package.edition` is unspecifiead, defaulting to `{}`",
142                DEFAULT_EDITION
143            ));
144            toml::Value::String(DEFAULT_EDITION.to_owned())
145        });
146        package
147            .entry("publish".to_owned())
148            .or_insert_with(|| toml::Value::Boolean(DEFAULT_PUBLISH));
149
150        let mut bin = toml::Table::new();
151        bin.insert("name".to_owned(), toml::Value::String(bin_name));
152        bin.insert(
153            "path".to_owned(),
154            toml::Value::String(
155                self.path
156                    .to_str()
157                    .ok_or_else(|| anyhow::format_err!("path is not valid UTF-8"))?
158                    .into(),
159            ),
160        );
161        manifest.insert(
162            "bin".to_owned(),
163            toml::Value::Array(vec![toml::Value::Table(bin)]),
164        );
165
166        let release = manifest
167            .entry("profile".to_owned())
168            .or_insert_with(|| toml::Value::Table(Default::default()))
169            .as_table_mut()
170            .ok_or_else(|| anyhow::format_err!("`profile` must be a table"))?
171            .entry("release".to_owned())
172            .or_insert_with(|| toml::Value::Table(Default::default()))
173            .as_table_mut()
174            .ok_or_else(|| anyhow::format_err!("`profile.release` must be a table"))?;
175        release
176            .entry("strip".to_owned())
177            .or_insert_with(|| toml::Value::Boolean(true));
178
179        Ok(manifest)
180    }
181
182    fn package_name(&self) -> CargoResult<String> {
183        let name = self
184            .path
185            .file_stem()
186            .ok_or_else(|| anyhow::format_err!("no file name"))?
187            .to_string_lossy();
188        let mut slug = String::new();
189        for (i, c) in name.chars().enumerate() {
190            match (i, c) {
191                (0, '0'..='9') => {
192                    slug.push('_');
193                    slug.push(c);
194                }
195                (_, '0'..='9') | (_, 'a'..='z') | (_, '_') | (_, '-') => {
196                    slug.push(c);
197                }
198                (_, 'A'..='Z') => {
199                    // Convert uppercase characters to lowercase to avoid `non_snake_case` warnings.
200                    slug.push(c.to_ascii_lowercase());
201                }
202                (_, _) => {
203                    slug.push('_');
204                }
205            }
206        }
207        Ok(slug)
208    }
209
210    fn hash(&self) -> blake3::Hash {
211        blake3::hash(self.body.as_bytes())
212    }
213}
214
215/// Locates a "code block manifest" in Rust source.
216fn extract_comment(input: &str) -> CargoResult<String> {
217    let re_crate_comment = regex::Regex::new(
218        // We need to find the first `/*!` or `//!` that *isn't* preceded by something that would
219        // make it apply to anything other than the crate itself.  Because we can't do this
220        // accurately, we'll just require that the doc-comment is the *first* thing in the file
221        // (after the optional shebang).
222        r"(?x)(^\s*|^\#![^\[].*?(\r\n|\n))(/\*!|//(!|/))",
223    )
224    .unwrap();
225    let re_margin = regex::Regex::new(r"^\s*\*( |$)").unwrap();
226    let re_space = regex::Regex::new(r"^(\s+)").unwrap();
227    let re_nesting = regex::Regex::new(r"/\*|\*/").unwrap();
228    let re_comment = regex::Regex::new(r"^\s*//(!|/)").unwrap();
229
230    fn n_leading_spaces(s: &str, n: usize) -> anyhow::Result<()> {
231        if !s.chars().take(n).all(|c| c == ' ') {
232            anyhow::bail!("leading {n:?} chars aren't all spaces: {s:?}")
233        }
234        Ok(())
235    }
236
237    /// Returns a slice of the input string with the leading shebang, if there is one, omitted.
238    fn strip_shebang(s: &str) -> &str {
239        let re_shebang = regex::Regex::new(r"^#![^\[].*?(\r\n|\n)").unwrap();
240        re_shebang.find(s).map(|m| &s[m.end()..]).unwrap_or(s)
241    }
242
243    // First, we will look for and slice out a contiguous, inner doc-comment which must be *the
244    // very first thing* in the file.  `#[doc(...)]` attributes *are not supported*.  Multiple
245    // single-line comments cannot have any blank lines between them.
246    let input = strip_shebang(input); // `re_crate_comment` doesn't work with shebangs
247    let start = re_crate_comment
248        .captures(input)
249        .ok_or_else(|| anyhow::format_err!("no doc-comment found"))?
250        .get(3)
251        .ok_or_else(|| anyhow::format_err!("no doc-comment found"))?
252        .start();
253
254    let input = &input[start..];
255
256    if let Some(input) = input.strip_prefix("/*!") {
257        // On every line:
258        //
259        // - update nesting level and detect end-of-comment
260        // - if margin is None:
261        //     - if there appears to be a margin, set margin.
262        // - strip off margin marker
263        // - update the leading space counter
264        // - strip leading space
265        // - append content
266        let mut r = String::new();
267
268        let mut leading_space = None;
269        let mut margin = None;
270        let mut depth: u32 = 1;
271
272        for line in input.lines() {
273            if depth == 0 {
274                break;
275            }
276
277            // Update nesting and look for end-of-comment.
278            let mut end_of_comment = None;
279
280            for (end, marker) in re_nesting.find_iter(line).map(|m| (m.start(), m.as_str())) {
281                match (marker, depth) {
282                    ("/*", _) => depth += 1,
283                    ("*/", 1) => {
284                        end_of_comment = Some(end);
285                        depth = 0;
286                        break;
287                    }
288                    ("*/", _) => depth -= 1,
289                    _ => panic!("got a comment marker other than /* or */"),
290                }
291            }
292
293            let line = end_of_comment.map(|end| &line[..end]).unwrap_or(line);
294
295            // Detect and strip margin.
296            margin = margin.or_else(|| re_margin.find(line).map(|m| m.as_str()));
297
298            let line = if let Some(margin) = margin {
299                let end = line
300                    .char_indices()
301                    .take(margin.len())
302                    .map(|(i, c)| i + c.len_utf8())
303                    .last()
304                    .unwrap_or(0);
305                &line[end..]
306            } else {
307                line
308            };
309
310            // Detect and strip leading indentation.
311            leading_space = leading_space.or_else(|| re_space.find(line).map(|m| m.end()));
312
313            // Make sure we have only leading spaces.
314            //
315            // If we see a tab, fall over.  I *would* expand them, but that gets into the question of how *many* spaces to expand them to, and *where* is the tab, because tabs are tab stops and not just N spaces.
316            n_leading_spaces(line, leading_space.unwrap_or(0))?;
317
318            let strip_len = line.len().min(leading_space.unwrap_or(0));
319            let line = &line[strip_len..];
320
321            // Done.
322            r.push_str(line);
323
324            // `lines` removes newlines.  Ideally, it wouldn't do that, but hopefully this shouldn't cause any *real* problems.
325            r.push('\n');
326        }
327
328        Ok(r)
329    } else if input.starts_with("//!") || input.starts_with("///") {
330        let mut r = String::new();
331
332        let mut leading_space = None;
333
334        for line in input.lines() {
335            // Strip leading comment marker.
336            let content = match re_comment.find(line) {
337                Some(m) => &line[m.end()..],
338                None => break,
339            };
340
341            // Detect and strip leading indentation.
342            leading_space = leading_space.or_else(|| {
343                re_space
344                    .captures(content)
345                    .and_then(|c| c.get(1))
346                    .map(|m| m.end())
347            });
348
349            // Make sure we have only leading spaces.
350            //
351            // If we see a tab, fall over.  I *would* expand them, but that gets into the question of how *many* spaces to expand them to, and *where* is the tab, because tabs are tab stops and not just N spaces.
352            n_leading_spaces(content, leading_space.unwrap_or(0))?;
353
354            let strip_len = content.len().min(leading_space.unwrap_or(0));
355            let content = &content[strip_len..];
356
357            // Done.
358            r.push_str(content);
359
360            // `lines` removes newlines.  Ideally, it wouldn't do that, but hopefully this shouldn't cause any *real* problems.
361            r.push('\n');
362        }
363
364        Ok(r)
365    } else {
366        Err(anyhow::format_err!("no doc-comment found"))
367    }
368}
369
370/// Extracts the first `Cargo` fenced code block from a chunk of Markdown.
371fn extract_manifest(comment: &str) -> CargoResult<Option<String>> {
372    use pulldown_cmark::{CodeBlockKind, Event, Options, Parser, Tag};
373
374    // To match librustdoc/html/markdown.rs, opts.
375    let exts = Options::ENABLE_TABLES | Options::ENABLE_FOOTNOTES;
376
377    let md = Parser::new_ext(comment, exts);
378
379    let mut inside = false;
380    let mut output = None;
381
382    for item in md {
383        match item {
384            Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(ref info)))
385                if info.to_lowercase() == "cargo" =>
386            {
387                if output.is_some() {
388                    anyhow::bail!("multiple `cargo` manifests present")
389                } else {
390                    output = Some(String::new());
391                }
392                inside = true;
393            }
394            Event::Text(ref text) if inside => {
395                let s = output.get_or_insert(String::new());
396                s.push_str(text);
397            }
398            Event::End(Tag::CodeBlock(_)) if inside => {
399                inside = false;
400            }
401            _ => (),
402        }
403    }
404
405    Ok(output)
406}
407
408#[cfg(test)]
409mod test_expand {
410    use super::*;
411
412    macro_rules! si {
413        ($i:expr) => {
414            RawScript::parse($i, std::path::Path::new("/home/me/test.rs"))
415                .unwrap_or_else(|err| panic!("{}", err))
416                .expand_manifest(&cargo::util::Config::default().unwrap())
417                .unwrap_or_else(|err| panic!("{}", err))
418        };
419    }
420
421    #[test]
422    fn test_default() {
423        snapbox::assert_eq(
424            r#"[[bin]]
425name = "test_a472c7a31645d310613df407eab80844346938a3b8fe4f392cae059cb181aa85"
426path = "/home/me/test.rs"
427
428[package]
429edition = "2021"
430name = "test"
431publish = false
432version = "0.0.0"
433
434[profile.release]
435strip = true
436
437[workspace]
438"#,
439            si!(r#"fn main() {}"#),
440        );
441    }
442
443    #[test]
444    fn test_dependencies() {
445        snapbox::assert_eq(
446            r#"[[bin]]
447name = "test_3a1fa07700654ea2e893f70bb422efa7884eb1021ccacabc5466efe545da8a0b"
448path = "/home/me/test.rs"
449
450[dependencies]
451time = "0.1.25"
452
453[package]
454edition = "2021"
455name = "test"
456publish = false
457version = "0.0.0"
458
459[profile.release]
460strip = true
461
462[workspace]
463"#,
464            si!(r#"
465//! ```cargo
466//! [dependencies]
467//! time="0.1.25"
468//! ```
469fn main() {}
470"#),
471        );
472    }
473}
474
475#[cfg(test)]
476mod test_comment {
477    use super::*;
478
479    macro_rules! ec {
480        ($s:expr) => {
481            extract_comment($s).unwrap_or_else(|err| panic!("{}", err))
482        };
483    }
484
485    #[test]
486    fn test_no_comment() {
487        snapbox::assert_eq(
488            "no doc-comment found",
489            extract_comment(
490                r#"
491fn main () {
492}
493"#,
494            )
495            .unwrap_err()
496            .to_string(),
497        );
498    }
499
500    #[test]
501    fn test_no_comment_she_bang() {
502        snapbox::assert_eq(
503            "no doc-comment found",
504            extract_comment(
505                r#"#!/usr/bin/env cargo-eval
506
507fn main () {
508}
509"#,
510            )
511            .unwrap_err()
512            .to_string(),
513        );
514    }
515
516    #[test]
517    fn test_comment() {
518        snapbox::assert_eq(
519            r#"Here is a manifest:
520
521```cargo
522[dependencies]
523time = "*"
524```
525"#,
526            ec!(r#"//! Here is a manifest:
527//!
528//! ```cargo
529//! [dependencies]
530//! time = "*"
531//! ```
532fn main() {}
533"#),
534        );
535    }
536
537    #[test]
538    fn test_comment_shebang() {
539        snapbox::assert_eq(
540            r#"Here is a manifest:
541
542```cargo
543[dependencies]
544time = "*"
545```
546"#,
547            ec!(r#"#!/usr/bin/env cargo-eval
548
549//! Here is a manifest:
550//!
551//! ```cargo
552//! [dependencies]
553//! time = "*"
554//! ```
555fn main() {}
556"#),
557        );
558    }
559
560    #[test]
561    fn test_multiline_comment() {
562        snapbox::assert_eq(
563            r#"
564Here is a manifest:
565
566```cargo
567[dependencies]
568time = "*"
569```
570
571"#,
572            ec!(r#"/*!
573Here is a manifest:
574
575```cargo
576[dependencies]
577time = "*"
578```
579*/
580
581fn main() {
582}
583"#),
584        );
585    }
586
587    #[test]
588    fn test_multiline_comment_shebang() {
589        snapbox::assert_eq(
590            r#"
591Here is a manifest:
592
593```cargo
594[dependencies]
595time = "*"
596```
597
598"#,
599            ec!(r#"#!/usr/bin/env cargo-eval
600
601/*!
602Here is a manifest:
603
604```cargo
605[dependencies]
606time = "*"
607```
608*/
609
610fn main() {
611}
612"#),
613        );
614    }
615
616    #[test]
617    fn test_multiline_block_comment() {
618        snapbox::assert_eq(
619            r#"
620Here is a manifest:
621
622```cargo
623[dependencies]
624time = "*"
625```
626
627"#,
628            ec!(r#"/*!
629 * Here is a manifest:
630 *
631 * ```cargo
632 * [dependencies]
633 * time = "*"
634 * ```
635 */
636fn main() {}
637"#),
638        );
639    }
640
641    #[test]
642    fn test_multiline_block_comment_shebang() {
643        snapbox::assert_eq(
644            r#"
645Here is a manifest:
646
647```cargo
648[dependencies]
649time = "*"
650```
651
652"#,
653            ec!(r#"#!/usr/bin/env cargo-eval
654
655/*!
656 * Here is a manifest:
657 *
658 * ```cargo
659 * [dependencies]
660 * time = "*"
661 * ```
662 */
663fn main() {}
664"#),
665        );
666    }
667}
668
669/// Given a Cargo manifest, attempts to rewrite relative file paths to absolute ones, allowing the manifest to be relocated.
670fn remap_paths(
671    mani: toml::Table,
672    package_root: &std::path::Path,
673) -> anyhow::Result<toml::value::Table> {
674    // Values that need to be rewritten:
675    let paths: &[&[&str]] = &[
676        &["build-dependencies", "*", "path"],
677        &["dependencies", "*", "path"],
678        &["dev-dependencies", "*", "path"],
679        &["package", "build"],
680        &["target", "*", "dependencies", "*", "path"],
681    ];
682
683    let mut mani = toml::Value::Table(mani);
684
685    for path in paths {
686        iterate_toml_mut_path(&mut mani, path, &mut |v| {
687            if let toml::Value::String(s) = v {
688                if std::path::Path::new(s).is_relative() {
689                    let p = package_root.join(&*s);
690                    if let Some(p) = p.to_str() {
691                        *s = p.into()
692                    }
693                }
694            }
695            Ok(())
696        })?
697    }
698
699    match mani {
700        toml::Value::Table(mani) => Ok(mani),
701        _ => unreachable!(),
702    }
703}
704
705/// Iterates over the specified TOML values via a path specification.
706fn iterate_toml_mut_path<F>(
707    base: &mut toml::Value,
708    path: &[&str],
709    on_each: &mut F,
710) -> anyhow::Result<()>
711where
712    F: FnMut(&mut toml::Value) -> anyhow::Result<()>,
713{
714    if path.is_empty() {
715        return on_each(base);
716    }
717
718    let cur = path[0];
719    let tail = &path[1..];
720
721    if cur == "*" {
722        if let toml::Value::Table(tab) = base {
723            for (_, v) in tab {
724                iterate_toml_mut_path(v, tail, on_each)?;
725            }
726        }
727    } else if let toml::Value::Table(tab) = base {
728        if let Some(v) = tab.get_mut(cur) {
729            iterate_toml_mut_path(v, tail, on_each)?;
730        }
731    }
732
733    Ok(())
734}
735
736#[cfg(test)]
737mod test_manifest {
738    use super::*;
739
740    macro_rules! smm {
741        ($c:expr) => {
742            extract_manifest($c)
743        };
744    }
745
746    #[test]
747    fn test_no_code_fence() {
748        assert_eq!(
749            smm!(
750                r#"There is no manifest in this comment.
751"#
752            )
753            .unwrap(),
754            None
755        );
756    }
757
758    #[test]
759    fn test_no_cargo_code_fence() {
760        assert_eq!(
761            smm!(
762                r#"There is no manifest in this comment.
763
764```
765This is not a manifest.
766```
767
768```rust
769println!("Nor is this.");
770```
771
772    Or this.
773"#
774            )
775            .unwrap(),
776            None
777        );
778    }
779
780    #[test]
781    fn test_cargo_code_fence() {
782        assert_eq!(
783            smm!(
784                r#"This is a manifest:
785
786```cargo
787dependencies = { time = "*" }
788```
789"#
790            )
791            .unwrap(),
792            Some(
793                r#"dependencies = { time = "*" }
794"#
795                .into()
796            )
797        );
798    }
799
800    #[test]
801    fn test_mixed_code_fence() {
802        assert_eq!(
803            smm!(
804                r#"This is *not* a manifest:
805
806```
807He's lying, I'm *totally* a manifest!
808```
809
810This *is*:
811
812```cargo
813dependencies = { time = "*" }
814```
815"#
816            )
817            .unwrap(),
818            Some(
819                r#"dependencies = { time = "*" }
820"#
821                .into()
822            )
823        );
824    }
825
826    #[test]
827    fn test_two_cargo_code_fence() {
828        assert!(smm!(
829            r#"This is a manifest:
830
831```cargo
832dependencies = { time = "*" }
833```
834
835So is this, but it doesn't count:
836
837```cargo
838dependencies = { explode = true }
839```
840"#
841        )
842        .is_err());
843    }
844}