rustpython_ruff_python_ast/script.rs
1use std::sync::LazyLock;
2
3use memchr::memmem::Finder;
4
5static FINDER: LazyLock<Finder> = LazyLock::new(|| Finder::new(b"# /// script"));
6
7/// PEP 723 metadata as parsed from a `script` comment block.
8///
9/// See: <https://peps.python.org/pep-0723/>
10///
11/// Vendored from: <https://github.com/astral-sh/uv/blob/debe67ffdb0cd7835734100e909b2d8f79613743/crates/uv-scripts/src/lib.rs#L283>
12#[derive(Debug, Clone, Eq, PartialEq)]
13pub struct ScriptTag {
14 /// The content of the script before the metadata block.
15 prelude: String,
16 /// The metadata block.
17 metadata: String,
18 /// The content of the script after the metadata block.
19 postlude: String,
20}
21
22impl ScriptTag {
23 /// Given the contents of a Python file, extract the `script` metadata block with leading
24 /// comment hashes removed, any preceding shebang or content (prelude), and the remaining Python
25 /// script.
26 ///
27 /// Given the following input string representing the contents of a Python script:
28 ///
29 /// ```python
30 /// #!/usr/bin/env python3
31 /// # /// script
32 /// # requires-python = '>=3.11'
33 /// # dependencies = [
34 /// # 'requests<3',
35 /// # 'rich',
36 /// # ]
37 /// # ///
38 ///
39 /// import requests
40 ///
41 /// print("Hello, World!")
42 /// ```
43 ///
44 /// This function would return:
45 ///
46 /// - Preamble: `#!/usr/bin/env python3\n`
47 /// - Metadata: `requires-python = '>=3.11'\ndependencies = [\n 'requests<3',\n 'rich',\n]`
48 /// - Postlude: `import requests\n\nprint("Hello, World!")\n`
49 ///
50 /// See: <https://peps.python.org/pep-0723/>
51 pub fn parse(contents: &[u8]) -> Option<Self> {
52 // Identify the opening pragma.
53 let index = FINDER.find(contents)?;
54
55 // The opening pragma must be the first line, or immediately preceded by a newline.
56 if !(index == 0 || matches!(contents[index - 1], b'\r' | b'\n')) {
57 return None;
58 }
59
60 // Extract the preceding content.
61 let prelude = std::str::from_utf8(&contents[..index]).ok()?;
62
63 // Decode as UTF-8.
64 let contents = &contents[index..];
65 let contents = std::str::from_utf8(contents).ok()?;
66
67 let mut lines = contents.lines();
68
69 // Ensure that the first line is exactly `# /// script`.
70 if lines.next().is_none_or(|line| line != "# /// script") {
71 return None;
72 }
73
74 // > Every line between these two lines (# /// TYPE and # ///) MUST be a comment starting
75 // > with #. If there are characters after the # then the first character MUST be a space. The
76 // > embedded content is formed by taking away the first two characters of each line if the
77 // > second character is a space, otherwise just the first character (which means the line
78 // > consists of only a single #).
79 let mut toml = vec![];
80
81 // Extract the content that follows the metadata block.
82 let mut python_script = vec![];
83
84 while let Some(line) = lines.next() {
85 // Remove the leading `#`.
86 let Some(line) = line.strip_prefix('#') else {
87 python_script.push(line);
88 python_script.extend(lines);
89 break;
90 };
91
92 // If the line is empty, continue.
93 if line.is_empty() {
94 toml.push("");
95 continue;
96 }
97
98 // Otherwise, the line _must_ start with ` `.
99 let Some(line) = line.strip_prefix(' ') else {
100 python_script.push(line);
101 python_script.extend(lines);
102 break;
103 };
104
105 toml.push(line);
106 }
107
108 // Find the closing `# ///`. The precedence is such that we need to identify the _last_ such
109 // line.
110 //
111 // For example, given:
112 // ```python
113 // # /// script
114 // #
115 // # ///
116 // #
117 // # ///
118 // ```
119 //
120 // The latter `///` is the closing pragma
121 let index = toml.iter().rev().position(|line| *line == "///")?;
122 let index = toml.len() - index;
123
124 // Discard any lines after the closing `# ///`.
125 //
126 // For example, given:
127 // ```python
128 // # /// script
129 // #
130 // # ///
131 // #
132 // #
133 // ```
134 //
135 // We need to discard the last two lines.
136 toml.truncate(index - 1);
137
138 // Join the lines into a single string.
139 let prelude = prelude.to_string();
140 let metadata = toml.join("\n") + "\n";
141 let postlude = python_script.join("\n") + "\n";
142
143 Some(Self {
144 prelude,
145 metadata,
146 postlude,
147 })
148 }
149}