Skip to main content

rustpython_ruff_python_ast/
script.rs

1use std::sync::LazyLock;
2
3use memchr::memmem::Finder;
4
5static FINDER: LazyLock<Finder> = LazyLock::new(|| Finder::new(b"# /// script"));
6
7/// PEP 723 metadata as parsed from a `script` comment block.
8///
9/// See: <https://peps.python.org/pep-0723/>
10///
11/// Vendored from: <https://github.com/astral-sh/uv/blob/debe67ffdb0cd7835734100e909b2d8f79613743/crates/uv-scripts/src/lib.rs#L283>
12#[derive(Debug, Clone, Eq, PartialEq)]
13pub struct ScriptTag {
14    /// The content of the script before the metadata block.
15    prelude: String,
16    /// The metadata block.
17    metadata: String,
18    /// The content of the script after the metadata block.
19    postlude: String,
20}
21
22impl ScriptTag {
23    /// Given the contents of a Python file, extract the `script` metadata block with leading
24    /// comment hashes removed, any preceding shebang or content (prelude), and the remaining Python
25    /// script.
26    ///
27    /// Given the following input string representing the contents of a Python script:
28    ///
29    /// ```python
30    /// #!/usr/bin/env python3
31    /// # /// script
32    /// # requires-python = '>=3.11'
33    /// # dependencies = [
34    /// #   'requests<3',
35    /// #   'rich',
36    /// # ]
37    /// # ///
38    ///
39    /// import requests
40    ///
41    /// print("Hello, World!")
42    /// ```
43    ///
44    /// This function would return:
45    ///
46    /// - Preamble: `#!/usr/bin/env python3\n`
47    /// - Metadata: `requires-python = '>=3.11'\ndependencies = [\n  'requests<3',\n  'rich',\n]`
48    /// - Postlude: `import requests\n\nprint("Hello, World!")\n`
49    ///
50    /// See: <https://peps.python.org/pep-0723/>
51    pub fn parse(contents: &[u8]) -> Option<Self> {
52        // Identify the opening pragma.
53        let index = FINDER.find(contents)?;
54
55        // The opening pragma must be the first line, or immediately preceded by a newline.
56        if !(index == 0 || matches!(contents[index - 1], b'\r' | b'\n')) {
57            return None;
58        }
59
60        // Extract the preceding content.
61        let prelude = std::str::from_utf8(&contents[..index]).ok()?;
62
63        // Decode as UTF-8.
64        let contents = &contents[index..];
65        let contents = std::str::from_utf8(contents).ok()?;
66
67        let mut lines = contents.lines();
68
69        // Ensure that the first line is exactly `# /// script`.
70        if lines.next().is_none_or(|line| line != "# /// script") {
71            return None;
72        }
73
74        // > Every line between these two lines (# /// TYPE and # ///) MUST be a comment starting
75        // > with #. If there are characters after the # then the first character MUST be a space. The
76        // > embedded content is formed by taking away the first two characters of each line if the
77        // > second character is a space, otherwise just the first character (which means the line
78        // > consists of only a single #).
79        let mut toml = vec![];
80
81        // Extract the content that follows the metadata block.
82        let mut python_script = vec![];
83
84        while let Some(line) = lines.next() {
85            // Remove the leading `#`.
86            let Some(line) = line.strip_prefix('#') else {
87                python_script.push(line);
88                python_script.extend(lines);
89                break;
90            };
91
92            // If the line is empty, continue.
93            if line.is_empty() {
94                toml.push("");
95                continue;
96            }
97
98            // Otherwise, the line _must_ start with ` `.
99            let Some(line) = line.strip_prefix(' ') else {
100                python_script.push(line);
101                python_script.extend(lines);
102                break;
103            };
104
105            toml.push(line);
106        }
107
108        // Find the closing `# ///`. The precedence is such that we need to identify the _last_ such
109        // line.
110        //
111        // For example, given:
112        // ```python
113        // # /// script
114        // #
115        // # ///
116        // #
117        // # ///
118        // ```
119        //
120        // The latter `///` is the closing pragma
121        let index = toml.iter().rev().position(|line| *line == "///")?;
122        let index = toml.len() - index;
123
124        // Discard any lines after the closing `# ///`.
125        //
126        // For example, given:
127        // ```python
128        // # /// script
129        // #
130        // # ///
131        // #
132        // #
133        // ```
134        //
135        // We need to discard the last two lines.
136        toml.truncate(index - 1);
137
138        // Join the lines into a single string.
139        let prelude = prelude.to_string();
140        let metadata = toml.join("\n") + "\n";
141        let postlude = python_script.join("\n") + "\n";
142
143        Some(Self {
144            prelude,
145            metadata,
146            postlude,
147        })
148    }
149}