Skip to main content

pro_core/script/
parser.rs

1//! PEP 723 inline script metadata parser
2//!
3//! Parses inline dependency specifications from Python scripts following
4//! PEP 723: https://peps.python.org/pep-0723/
5//!
6//! Example script metadata:
7//! ```python
8//! # /// script
9//! # requires-python = ">=3.11"
10//! # dependencies = [
11//! #   "requests",
12//! #   "rich>=10.0",
13//! # ]
14//! # ///
15//! ```
16
17use crate::{Error, Result};
18
19/// Metadata extracted from a PEP 723 script
20#[derive(Debug, Clone, Default)]
21pub struct ScriptMetadata {
22    /// Required Python version (PEP 440 specifier)
23    pub requires_python: Option<String>,
24    /// Package dependencies
25    pub dependencies: Vec<String>,
26}
27
28impl ScriptMetadata {
29    /// Check if the script has any metadata
30    pub fn is_empty(&self) -> bool {
31        self.requires_python.is_none() && self.dependencies.is_empty()
32    }
33
34    /// Check if the script has dependencies
35    pub fn has_dependencies(&self) -> bool {
36        !self.dependencies.is_empty()
37    }
38
39    /// Generate a hash of the dependencies for caching
40    pub fn dependency_hash(&self) -> String {
41        use sha2::{Digest, Sha256};
42
43        let mut hasher = Sha256::new();
44
45        // Include requires-python in hash
46        if let Some(ref req) = self.requires_python {
47            hasher.update(req.as_bytes());
48        }
49
50        // Include sorted dependencies
51        let mut deps = self.dependencies.clone();
52        deps.sort();
53        for dep in deps {
54            hasher.update(dep.as_bytes());
55        }
56
57        let result = hasher.finalize();
58        hex::encode(&result[..8]) // Use first 8 bytes (16 hex chars)
59    }
60}
61
62/// Parse PEP 723 inline script metadata from script content
63///
64/// Looks for a block of the form:
65/// ```text
66/// # /// script
67/// # key = "value"
68/// # ///
69/// ```
70pub fn parse_script_metadata(content: &str) -> Result<ScriptMetadata> {
71    // Find the script metadata block
72    let block = extract_metadata_block(content)?;
73
74    if block.is_none() {
75        return Ok(ScriptMetadata::default());
76    }
77
78    let block = block.unwrap();
79
80    // Parse as TOML
81    let toml_content = block.join("\n");
82    parse_metadata_toml(&toml_content)
83}
84
85/// Extract the metadata block lines from script content
86fn extract_metadata_block(content: &str) -> Result<Option<Vec<String>>> {
87    let mut in_block = false;
88    let mut block_lines = Vec::new();
89
90    for line in content.lines() {
91        let trimmed = line.trim();
92
93        // Check for block start
94        if !in_block {
95            if trimmed == "# /// script" {
96                in_block = true;
97                continue;
98            }
99            // Also handle variations without space
100            if trimmed == "#/// script" {
101                in_block = true;
102                continue;
103            }
104        } else {
105            // Check for block end
106            if trimmed == "# ///" || trimmed == "#///" {
107                return Ok(Some(block_lines));
108            }
109
110            // Extract the content after "# "
111            if let Some(content) = trimmed.strip_prefix("# ") {
112                block_lines.push(content.to_string());
113            } else if let Some(content) = trimmed.strip_prefix("#") {
114                // Handle lines without space after #
115                block_lines.push(content.to_string());
116            } else if trimmed.is_empty() {
117                // Preserve empty lines within the block
118                block_lines.push(String::new());
119            } else {
120                // Non-comment line inside block is an error
121                return Err(Error::ScriptMetadataError(format!(
122                    "unexpected non-comment line in script block: {}",
123                    line
124                )));
125            }
126        }
127    }
128
129    // If we're still in the block, it wasn't closed
130    if in_block {
131        return Err(Error::ScriptMetadataError(
132            "script metadata block not closed (missing # ///)".into(),
133        ));
134    }
135
136    Ok(None)
137}
138
139/// Parse the extracted metadata as TOML
140fn parse_metadata_toml(content: &str) -> Result<ScriptMetadata> {
141    let table: toml::Table = toml::from_str(content).map_err(|e| {
142        Error::ScriptMetadataError(format!("invalid TOML in script metadata: {}", e))
143    })?;
144
145    let mut metadata = ScriptMetadata::default();
146
147    // Extract requires-python
148    if let Some(value) = table.get("requires-python") {
149        metadata.requires_python = value
150            .as_str()
151            .map(|s| s.to_string())
152            .ok_or_else(|| Error::ScriptMetadataError("requires-python must be a string".into()))?
153            .into();
154    }
155
156    // Extract dependencies
157    if let Some(value) = table.get("dependencies") {
158        let deps = value
159            .as_array()
160            .ok_or_else(|| Error::ScriptMetadataError("dependencies must be an array".into()))?;
161
162        for dep in deps {
163            let dep_str = dep
164                .as_str()
165                .ok_or_else(|| Error::ScriptMetadataError("dependency must be a string".into()))?;
166            metadata.dependencies.push(dep_str.to_string());
167        }
168    }
169
170    Ok(metadata)
171}
172
173/// Check if a file looks like it might have PEP 723 metadata
174///
175/// This is a quick check that doesn't fully parse the metadata.
176pub fn might_have_metadata(content: &str) -> bool {
177    content.contains("# /// script") || content.contains("#/// script")
178}
179
180#[cfg(test)]
181mod tests {
182    use super::*;
183
184    #[test]
185    fn test_parse_simple_metadata() {
186        let content = r#"#!/usr/bin/env python
187# /// script
188# requires-python = ">=3.11"
189# dependencies = ["requests"]
190# ///
191
192import requests
193print("Hello")
194"#;
195
196        let metadata = parse_script_metadata(content).unwrap();
197        assert_eq!(metadata.requires_python, Some(">=3.11".to_string()));
198        assert_eq!(metadata.dependencies, vec!["requests"]);
199    }
200
201    #[test]
202    fn test_parse_multiple_dependencies() {
203        let content = r#"# /// script
204# requires-python = ">=3.10"
205# dependencies = [
206#   "requests>=2.28",
207#   "rich",
208#   "click>=8.0",
209# ]
210# ///
211"#;
212
213        let metadata = parse_script_metadata(content).unwrap();
214        assert_eq!(metadata.requires_python, Some(">=3.10".to_string()));
215        assert_eq!(
216            metadata.dependencies,
217            vec!["requests>=2.28", "rich", "click>=8.0"]
218        );
219    }
220
221    #[test]
222    fn test_no_metadata() {
223        let content = r#"#!/usr/bin/env python
224import sys
225print(sys.version)
226"#;
227
228        let metadata = parse_script_metadata(content).unwrap();
229        assert!(metadata.is_empty());
230    }
231
232    #[test]
233    fn test_unclosed_block() {
234        let content = r#"# /// script
235# dependencies = ["requests"]
236import requests
237"#;
238
239        let result = parse_script_metadata(content);
240        assert!(result.is_err());
241    }
242
243    #[test]
244    fn test_dependency_only() {
245        let content = r#"# /// script
246# dependencies = ["numpy", "pandas"]
247# ///
248import numpy
249"#;
250
251        let metadata = parse_script_metadata(content).unwrap();
252        assert!(metadata.requires_python.is_none());
253        assert_eq!(metadata.dependencies, vec!["numpy", "pandas"]);
254    }
255
256    #[test]
257    fn test_might_have_metadata() {
258        assert!(might_have_metadata("# /// script\n# ///"));
259        assert!(might_have_metadata("#/// script"));
260        assert!(!might_have_metadata("import sys"));
261    }
262
263    #[test]
264    fn test_dependency_hash() {
265        let meta1 = ScriptMetadata {
266            requires_python: Some(">=3.11".to_string()),
267            dependencies: vec!["requests".to_string(), "rich".to_string()],
268        };
269
270        let meta2 = ScriptMetadata {
271            requires_python: Some(">=3.11".to_string()),
272            dependencies: vec!["rich".to_string(), "requests".to_string()], // Different order
273        };
274
275        // Same deps (sorted) should produce same hash
276        assert_eq!(meta1.dependency_hash(), meta2.dependency_hash());
277
278        // Different deps should produce different hash
279        let meta3 = ScriptMetadata {
280            requires_python: Some(">=3.11".to_string()),
281            dependencies: vec!["requests".to_string()],
282        };
283        assert_ne!(meta1.dependency_hash(), meta3.dependency_hash());
284    }
285}