Skip to main content

normalize_manifest/
setup_py.rs

1//! Heuristic parser for `setup.py` files (Python/setuptools).
2//!
3//! Performs regex-free text extraction of `setup()` keyword arguments without
4//! executing the file. For eval-backed parsing (which actually runs Python),
5//! see `src/eval.rs`.
6
7use crate::pip::parse_pip_requirement;
8use crate::{DeclaredDep, DepKind, ManifestError, ManifestParser, ParsedManifest};
9
10/// Heuristic parser for `setup.py` files.
11pub struct SetupPyParser;
12
13impl ManifestParser for SetupPyParser {
14    fn filename(&self) -> &'static str {
15        "setup.py"
16    }
17
18    fn parse(&self, content: &str) -> Result<ParsedManifest, ManifestError> {
19        let name = extract_str_kwarg(content, "name");
20        let version = extract_str_kwarg(content, "version");
21
22        let mut deps = Vec::new();
23
24        // install_requires=[...] — Normal
25        if let Some(list) = extract_list(content, "install_requires") {
26            for item in parse_string_list(&list) {
27                if let Some(mut dep) = parse_pip_requirement(&item) {
28                    dep.kind = DepKind::Normal;
29                    deps.push(dep);
30                }
31            }
32        }
33
34        // tests_require=[...] — Dev
35        if let Some(list) = extract_list(content, "tests_require") {
36            for item in parse_string_list(&list) {
37                if let Some(mut dep) = parse_pip_requirement(&item) {
38                    dep.kind = DepKind::Dev;
39                    deps.push(dep);
40                }
41            }
42        }
43
44        // extras_require={...} — Dev for dev/test/testing/tests groups, Optional otherwise
45        if let Some(block) = extract_braces(content, "extras_require") {
46            deps.extend(parse_extras_require(&block));
47        }
48
49        Ok(ParsedManifest {
50            ecosystem: "python",
51            name,
52            version,
53            dependencies: deps,
54        })
55    }
56}
57
58// ── Extraction helpers ────────────────────────────────────────────────────────
59
60/// Extract the value of a `key="..."` or `key='...'` keyword argument from
61/// the setup() call. Returns the unquoted string.
62fn extract_str_kwarg(content: &str, key: &str) -> Option<String> {
63    // Match `key=` followed by optional whitespace then a quoted string.
64    let search = format!("{key}=");
65    let pos = content.find(&search)?;
66    let after = content[pos + search.len()..].trim_start();
67    extract_quoted_string(after)
68}
69
70/// Extract the content of a `[...]` bracket block following `key=[`.
71/// Handles multiline lists and nested brackets.
72fn extract_list(content: &str, key: &str) -> Option<String> {
73    let search = format!("{key}=[");
74    let pos = content.find(&search)?;
75    let after = &content[pos + search.len()..];
76    Some(collect_until_close(after, '[', ']'))
77}
78
79/// Extract the content of a `{...}` brace block following `key={`.
80fn extract_braces(content: &str, key: &str) -> Option<String> {
81    let search = format!("{key}={{");
82    let pos = content.find(&search)?;
83    let after = &content[pos + search.len()..];
84    Some(collect_until_close(after, '{', '}'))
85}
86
87/// Collect characters until the matching close delimiter, tracking depth.
88/// The opening delimiter has already been consumed; `after` starts after it.
89fn collect_until_close(after: &str, open: char, close: char) -> String {
90    let mut depth = 1usize;
91    let mut result = String::new();
92    for ch in after.chars() {
93        if ch == open {
94            depth += 1;
95            result.push(ch);
96        } else if ch == close {
97            depth -= 1;
98            if depth == 0 {
99                break;
100            }
101            result.push(ch);
102        } else {
103            result.push(ch);
104        }
105    }
106    result
107}
108
109/// Extract a quoted string (single or double quotes) from the start of `s`.
110fn extract_quoted_string(s: &str) -> Option<String> {
111    let quote = s.chars().next()?;
112    if quote != '"' && quote != '\'' {
113        return None;
114    }
115    let rest = &s[1..];
116    let end = rest.find(quote)?;
117    Some(rest[..end].to_string())
118}
119
120/// Parse a list body (content between `[` and `]`) and return each quoted
121/// string entry.
122fn parse_string_list(body: &str) -> Vec<String> {
123    let mut result = Vec::new();
124    let mut remaining = body;
125    while let Some(quote_pos) = remaining.find(['"', '\'']) {
126        let quote = if remaining.as_bytes()[quote_pos] == b'"' {
127            '"'
128        } else {
129            '\''
130        };
131        let after_open = &remaining[quote_pos + 1..];
132        // Find closing quote (not preceded by backslash — simple heuristic)
133        match after_open.find(quote) {
134            Some(end) => {
135                result.push(after_open[..end].to_string());
136                remaining = &after_open[end + 1..];
137            }
138            None => break,
139        }
140    }
141    result
142}
143
144/// Parse the body of `extras_require={...}`.
145///
146/// Structure: `"group": ["pkg1", "pkg2"], "other": [...]`
147///
148/// Groups matching `dev`, `test`, `testing`, `tests` become Dev; others become
149/// Optional.
150fn parse_extras_require(body: &str) -> Vec<DeclaredDep> {
151    let dev_groups = ["dev", "test", "testing", "tests", "develop", "development"];
152    let mut deps = Vec::new();
153    let mut remaining = body;
154
155    while let Some(quote_pos) = remaining.find(['"', '\'']) {
156        let quote = if remaining.as_bytes()[quote_pos] == b'"' {
157            '"'
158        } else {
159            '\''
160        };
161        let after_open = &remaining[quote_pos + 1..];
162        let key_end = match after_open.find(quote) {
163            Some(e) => e,
164            None => break,
165        };
166        let group = after_open[..key_end].to_string();
167        remaining = &after_open[key_end + 1..];
168
169        // Find the `[` starting the list for this group
170        let bracket_pos = match remaining.find('[') {
171            Some(p) => p,
172            None => break,
173        };
174        remaining = &remaining[bracket_pos + 1..];
175        let list_body = collect_until_close(remaining, '[', ']');
176        // Advance past the list
177        let consumed = list_body.len() + 1; // +1 for the closing `]`
178        if consumed <= remaining.len() {
179            remaining = &remaining[consumed..];
180        } else {
181            remaining = "";
182        }
183
184        let kind = if dev_groups.contains(&group.as_str()) {
185            DepKind::Dev
186        } else {
187            DepKind::Optional
188        };
189
190        for item in parse_string_list(&list_body) {
191            if let Some(mut dep) = parse_pip_requirement(&item) {
192                dep.kind = kind;
193                deps.push(dep);
194            }
195        }
196    }
197
198    deps
199}
200
201// ── Tests ─────────────────────────────────────────────────────────────────────
202
203#[cfg(test)]
204mod tests {
205    use super::*;
206    use crate::ManifestParser;
207
208    #[test]
209    fn test_basic_install_requires() {
210        let content = r#"
211from setuptools import setup
212
213setup(
214    name="mypackage",
215    version="1.0.0",
216    install_requires=[
217        "requests>=2.28.0",
218        "click>=8.0",
219    ],
220)
221"#;
222        let m = SetupPyParser.parse(content).unwrap();
223        assert_eq!(m.ecosystem, "python");
224        assert_eq!(m.name.as_deref(), Some("mypackage"));
225        assert_eq!(m.version.as_deref(), Some("1.0.0"));
226        assert_eq!(m.dependencies.len(), 2);
227
228        let req = m
229            .dependencies
230            .iter()
231            .find(|d| d.name == "requests")
232            .unwrap();
233        assert_eq!(req.version_req.as_deref(), Some(">=2.28.0"));
234        assert_eq!(req.kind, DepKind::Normal);
235
236        let click = m.dependencies.iter().find(|d| d.name == "click").unwrap();
237        assert_eq!(click.version_req.as_deref(), Some(">=8.0"));
238        assert_eq!(click.kind, DepKind::Normal);
239    }
240
241    #[test]
242    fn test_multiline_install_requires() {
243        let content = r#"
244setup(
245    name='mypkg',
246    install_requires=[
247        'flask>=2.0',
248        'sqlalchemy',
249        'celery>=5.0,<6',
250    ],
251)
252"#;
253        let m = SetupPyParser.parse(content).unwrap();
254        assert_eq!(m.dependencies.len(), 3);
255
256        let flask = m.dependencies.iter().find(|d| d.name == "flask").unwrap();
257        assert_eq!(flask.version_req.as_deref(), Some(">=2.0"));
258
259        let sa = m
260            .dependencies
261            .iter()
262            .find(|d| d.name == "sqlalchemy")
263            .unwrap();
264        assert!(sa.version_req.is_none());
265    }
266
267    #[test]
268    fn test_tests_require() {
269        let content = r#"
270setup(
271    name='mypkg',
272    version='0.1.0',
273    install_requires=['requests'],
274    tests_require=['pytest>=7.0', 'coverage'],
275)
276"#;
277        let m = SetupPyParser.parse(content).unwrap();
278
279        let normal: Vec<_> = m
280            .dependencies
281            .iter()
282            .filter(|d| d.kind == DepKind::Normal)
283            .collect();
284        assert_eq!(normal.len(), 1);
285        assert_eq!(normal[0].name, "requests");
286
287        let dev: Vec<_> = m
288            .dependencies
289            .iter()
290            .filter(|d| d.kind == DepKind::Dev)
291            .collect();
292        assert_eq!(dev.len(), 2);
293        assert!(dev.iter().any(|d| d.name == "pytest"));
294        assert!(dev.iter().any(|d| d.name == "coverage"));
295
296        let pytest = dev.iter().find(|d| d.name == "pytest").unwrap();
297        assert_eq!(pytest.version_req.as_deref(), Some(">=7.0"));
298    }
299
300    #[test]
301    fn test_extras_require_dev_and_optional() {
302        let content = r#"
303setup(
304    name='mypkg',
305    version='2.0.0',
306    extras_require={
307        "dev": ["pytest>=7.0", "black"],
308        "test": ["pytest", "coverage"],
309        "docs": ["sphinx>=5.0", "myst-parser"],
310    },
311)
312"#;
313        let m = SetupPyParser.parse(content).unwrap();
314
315        let dev: Vec<_> = m
316            .dependencies
317            .iter()
318            .filter(|d| d.kind == DepKind::Dev)
319            .collect();
320        // "dev" group: pytest, black; "test" group: pytest, coverage = 4 entries
321        assert_eq!(dev.len(), 4);
322
323        let optional: Vec<_> = m
324            .dependencies
325            .iter()
326            .filter(|d| d.kind == DepKind::Optional)
327            .collect();
328        // "docs" group: sphinx, myst-parser = 2 entries
329        assert_eq!(optional.len(), 2);
330        assert!(optional.iter().any(|d| d.name == "sphinx"));
331    }
332
333    #[test]
334    fn test_extras_require_testing_group() {
335        let content = r#"
336setup(
337    extras_require={
338        "testing": ["pytest"],
339        "tests": ["coverage"],
340    },
341)
342"#;
343        let m = SetupPyParser.parse(content).unwrap();
344        let dev: Vec<_> = m
345            .dependencies
346            .iter()
347            .filter(|d| d.kind == DepKind::Dev)
348            .collect();
349        assert_eq!(dev.len(), 2);
350    }
351
352    #[test]
353    fn test_no_deps() {
354        let content = r#"
355from setuptools import setup
356setup(
357    name="simple",
358    version="0.0.1",
359)
360"#;
361        let m = SetupPyParser.parse(content).unwrap();
362        assert_eq!(m.name.as_deref(), Some("simple"));
363        assert!(m.dependencies.is_empty());
364    }
365}