Skip to main content

provenant/parsers/
conan.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Parser for Conan C/C++ package manager manifests.
5//!
6//! Extracts package metadata and dependencies from Conan manifest files.
7//!
8//! # Supported Formats
9//! - conanfile.py (Recipe files with Python AST parsing)
10//! - conanfile.txt (Simple dependency specification format)
11//! - conan.lock (Lockfile with resolved dependency graph)
12//!
13//! # Key Features
14//! - AST-based conanfile.py parsing (NO code execution)
15//! - Dependency extraction from [requires] and [build_requires] sections
16//! - Version constraint parsing for Conan reference format (name/version@user/channel)
17//! - Package URL (purl) generation for resolved dependencies
18//! - Lockfile dependency graph parsing
19//!
20//! # Implementation Notes
21//! - conanfile.py: AST extracts class attributes and self.requires() calls
22//! - conanfile.txt sections: [requires] = runtime, [build_requires] = build-time
23//! - conan.lock uses JSON format with graph_lock.nodes structure
24//! - Version constraints use Conan-specific operators: [>, <, ranges]
25//! - Only exact versions (without operators) are extracted as pinned versions
26
27use std::path::Path;
28
29use crate::parser_warn as warn;
30use packageurl::PackageUrl;
31use ruff_python_ast as ast;
32use ruff_python_parser::parse_module;
33use serde_json::Value;
34
35use crate::models::{DatasourceId, Dependency, PackageData, PackageType};
36
37use super::PackageParser;
38use super::license_normalization::{
39    DeclaredLicenseMatchMetadata, build_declared_license_data, normalize_declared_license_key,
40};
41use super::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
42
43const MAX_AST_DEPTH: usize = 50;
44const MAX_AST_NODES: usize = 10_000;
45
46/// Conan conanfile.py recipe parser.
47///
48/// Parses Python-based Conan recipe files using AST analysis (no code execution).
49/// Extracts package metadata and dependencies from ConanFile class attributes.
50pub struct ConanFilePyParser;
51
52impl PackageParser for ConanFilePyParser {
53    const PACKAGE_TYPE: PackageType = PackageType::Conan;
54
55    fn is_match(path: &Path) -> bool {
56        path.file_name().is_some_and(|name| name == "conanfile.py")
57    }
58
59    fn extract_packages(path: &Path) -> Vec<PackageData> {
60        let contents = match read_file_to_string(path, None) {
61            Ok(c) => c,
62            Err(e) => {
63                warn!("Failed to read {}: {}", path.display(), e);
64                return vec![default_package_data(DatasourceId::ConanConanFilePy)];
65            }
66        };
67
68        vec![match parse_module(&contents) {
69            Ok(parsed) => parse_conanfile_py(parsed.suite()),
70            Err(e) => {
71                warn!("Failed to parse Python AST in {}: {}", path.display(), e);
72                default_package_data(DatasourceId::ConanConanFilePy)
73            }
74        }]
75    }
76
77    fn metadata() -> Vec<super::metadata::ParserMetadata> {
78        vec![super::metadata::ParserMetadata {
79            description: "Conan C/C++ package manifest",
80            file_patterns: &["**/conanfile.py", "**/conanfile.txt", "**/conan.lock"],
81            package_type: "conan",
82            primary_language: "C++",
83            documentation_url: Some("https://docs.conan.io/"),
84        }]
85    }
86}
87
88/// Parse conanfile.py AST to extract ConanFile class attributes
89fn parse_conanfile_py(statements: &[ast::Stmt]) -> PackageData {
90    for stmt in statements {
91        if let ast::Stmt::ClassDef(class_def) = stmt
92            && has_conanfile_base(class_def)
93        {
94            return extract_conanfile_data(class_def);
95        }
96    }
97
98    default_package_data(DatasourceId::ConanConanFilePy)
99}
100
101/// Check if class inherits from ConanFile
102fn has_conanfile_base(class_def: &ast::StmtClassDef) -> bool {
103    class_def.bases().iter().any(|base| {
104        if let ast::Expr::Name(ast::ExprName { id, .. }) = base {
105            id.as_str() == "ConanFile"
106        } else {
107            false
108        }
109    })
110}
111
112/// Extract package data from ConanFile class definition
113fn extract_conanfile_data(class_def: &ast::StmtClassDef) -> PackageData {
114    let mut name = None;
115    let mut version = None;
116    let mut description = None;
117    let mut _author = None;
118    let mut homepage_url = None;
119    let mut vcs_url = None;
120    let mut license_list = Vec::new();
121    let mut keywords = Vec::new();
122    let mut requires_list = Vec::new();
123    let mut tool_requires_list = Vec::new();
124
125    for stmt in class_def.body.iter().take(MAX_ITERATION_COUNT) {
126        match stmt {
127            ast::Stmt::Assign(ast::StmtAssign { targets, value, .. }) => {
128                if let Some(target_name) = get_assignment_target(targets) {
129                    match target_name.as_str() {
130                        "name" => name = get_string_value(value).map(truncate_field),
131                        "version" => version = get_string_value(value).map(truncate_field),
132                        "description" => description = get_string_value(value).map(truncate_field),
133                        "author" => _author = get_string_value(value).map(truncate_field),
134                        "homepage" => homepage_url = get_string_value(value).map(truncate_field),
135                        "url" => vcs_url = get_string_value(value).map(truncate_field),
136                        "license" => {
137                            license_list = get_list_values(value)
138                                .into_iter()
139                                .map(truncate_field)
140                                .collect()
141                        }
142                        "topics" => {
143                            keywords = get_list_values(value)
144                                .into_iter()
145                                .map(truncate_field)
146                                .collect()
147                        }
148                        "requires" => {
149                            requires_list = get_list_values(value)
150                                .into_iter()
151                                .map(truncate_field)
152                                .collect()
153                        }
154                        _ => {}
155                    }
156                }
157            }
158            ast::Stmt::FunctionDef(ast::StmtFunctionDef { body, .. }) => {
159                if let Some(requires) = extract_self_requires_calls(body, "requires") {
160                    requires_list.extend(requires);
161                }
162                if let Some(tool_requires) = extract_self_requires_calls(body, "tool_requires") {
163                    tool_requires_list.extend(tool_requires);
164                }
165            }
166            _ => {}
167        }
168    }
169
170    let mut dependencies = requires_list
171        .into_iter()
172        .filter_map(|req| parse_conan_reference(&req))
173        .collect::<Vec<_>>();
174    dependencies.extend(
175        tool_requires_list
176            .into_iter()
177            .filter_map(|req| parse_conan_reference(&req))
178            .map(|dep| Dependency {
179                scope: Some("build".to_string()),
180                is_runtime: Some(false),
181                ..dep
182            }),
183    );
184
185    let extracted_license = if !license_list.is_empty() {
186        Some(truncate_field(license_list.join(", ")))
187    } else {
188        None
189    };
190    let (declared_license_expression, declared_license_expression_spdx, license_detections) =
191        if license_list.len() == 1 {
192            if let Some(normalized) = normalize_declared_license_key(&license_list[0]) {
193                let (expr, spdx, detections) = build_declared_license_data(
194                    normalized,
195                    DeclaredLicenseMatchMetadata::single_line(&license_list[0]),
196                );
197                (
198                    expr.map(truncate_field),
199                    spdx.map(truncate_field),
200                    detections,
201                )
202            } else {
203                (None, None, Vec::new())
204            }
205        } else {
206            (None, None, Vec::new())
207        };
208
209    PackageData {
210        name,
211        version,
212        description,
213        homepage_url,
214        vcs_url,
215        keywords,
216        dependencies,
217        declared_license_expression,
218        declared_license_expression_spdx,
219        license_detections,
220        extracted_license_statement: extracted_license,
221        datasource_id: Some(DatasourceId::ConanConanFilePy),
222        ..default_package_data(DatasourceId::ConanConanFilePy)
223    }
224}
225
226/// Get assignment target name (e.g., "name" from "name = 'foo'")
227fn get_assignment_target(targets: &[ast::Expr]) -> Option<String> {
228    targets.first().and_then(|target| {
229        if let ast::Expr::Name(ast::ExprName { id, .. }) = target {
230            Some(id.to_string())
231        } else {
232            None
233        }
234    })
235}
236
237/// Extract string value from AST expression
238fn get_string_value(expr: &ast::Expr) -> Option<String> {
239    match expr {
240        ast::Expr::StringLiteral(ast::ExprStringLiteral { value, .. }) => {
241            Some(value.to_str().to_string())
242        }
243        _ => None,
244    }
245}
246
247/// Extract list of strings from tuple or list expression
248fn get_list_values(expr: &ast::Expr) -> Vec<String> {
249    match expr {
250        ast::Expr::Tuple(ast::ExprTuple { elts, .. }) => {
251            elts.iter().filter_map(get_string_value).collect()
252        }
253        ast::Expr::List(ast::ExprList { elts, .. }) => {
254            elts.iter().filter_map(get_string_value).collect()
255        }
256        _ => {
257            if let Some(s) = get_string_value(expr) {
258                vec![s]
259            } else {
260                Vec::new()
261            }
262        }
263    }
264}
265
266/// Extract self.requires() method calls from function body
267fn extract_self_requires_calls(body: &[ast::Stmt], method_name: &str) -> Option<Vec<String>> {
268    let mut requires = Vec::new();
269    let mut node_count = 0usize;
270
271    for stmt in body {
272        collect_self_method_calls(stmt, method_name, &mut requires, 0, &mut node_count);
273        if node_count >= MAX_AST_NODES {
274            warn!(
275                "Exceeded MAX_AST_NODES ({}) in extract_self_requires_calls",
276                MAX_AST_NODES
277            );
278            break;
279        }
280    }
281
282    if requires.is_empty() {
283        None
284    } else {
285        Some(requires)
286    }
287}
288
289fn collect_self_method_calls(
290    stmt: &ast::Stmt,
291    method_name: &str,
292    out: &mut Vec<String>,
293    depth: usize,
294    node_count: &mut usize,
295) {
296    if depth > MAX_AST_DEPTH {
297        warn!(
298            "Exceeded MAX_AST_DEPTH ({}) in collect_self_method_calls",
299            MAX_AST_DEPTH
300        );
301        return;
302    }
303    *node_count += 1;
304    if *node_count > MAX_AST_NODES {
305        return;
306    }
307
308    match stmt {
309        ast::Stmt::Expr(ast::StmtExpr { value, .. }) => {
310            if let ast::Expr::Call(call) = value.as_ref()
311                && is_self_method_call(call, method_name)
312                && let Some(arg) = call.arguments.args.first()
313                && let Some(req) = get_string_value(arg)
314            {
315                out.push(truncate_field(req));
316            }
317        }
318        ast::Stmt::If(ast::StmtIf {
319            body,
320            elif_else_clauses,
321            ..
322        }) => {
323            for nested in body {
324                collect_self_method_calls(nested, method_name, out, depth + 1, node_count);
325            }
326            for clause in elif_else_clauses {
327                for nested in &clause.body {
328                    collect_self_method_calls(nested, method_name, out, depth + 1, node_count);
329                }
330            }
331        }
332        ast::Stmt::With(ast::StmtWith { body, .. })
333        | ast::Stmt::While(ast::StmtWhile { body, .. })
334        | ast::Stmt::For(ast::StmtFor { body, .. }) => {
335            for nested in body {
336                collect_self_method_calls(nested, method_name, out, depth + 1, node_count);
337            }
338        }
339        ast::Stmt::Try(ast::StmtTry {
340            body,
341            handlers,
342            orelse,
343            finalbody,
344            ..
345        }) => {
346            for nested in body.iter().chain(orelse.iter()).chain(finalbody.iter()) {
347                collect_self_method_calls(nested, method_name, out, depth + 1, node_count);
348            }
349            for handler in handlers {
350                let ast::ExceptHandler::ExceptHandler(handler) = handler;
351                for nested in &handler.body {
352                    collect_self_method_calls(nested, method_name, out, depth + 1, node_count);
353                }
354            }
355        }
356        ast::Stmt::Match(ast::StmtMatch { cases, .. }) => {
357            for case in cases {
358                for nested in &case.body {
359                    collect_self_method_calls(nested, method_name, out, depth + 1, node_count);
360                }
361            }
362        }
363        _ => {}
364    }
365}
366
367fn is_self_method_call(call: &ast::ExprCall, method_name: &str) -> bool {
368    if let ast::Expr::Attribute(ast::ExprAttribute { value, attr, .. }) = call.func.as_ref()
369        && let ast::Expr::Name(ast::ExprName { id, .. }) = value.as_ref()
370    {
371        return id.as_str() == "self" && attr.as_str() == method_name;
372    }
373    false
374}
375
376/// Conan conanfile.txt manifest parser.
377///
378/// Extracts dependencies from the simple conanfile.txt format, which uses
379/// INI-style sections to specify runtime and build-time dependencies.
380pub struct ConanfileTxtParser;
381
382impl PackageParser for ConanfileTxtParser {
383    const PACKAGE_TYPE: PackageType = PackageType::Conan;
384
385    fn is_match(path: &Path) -> bool {
386        path.file_name().is_some_and(|name| name == "conanfile.txt")
387    }
388
389    fn extract_packages(path: &Path) -> Vec<PackageData> {
390        let contents = match read_file_to_string(path, None) {
391            Ok(c) => c,
392            Err(e) => {
393                warn!("Failed to read {}: {}", path.display(), e);
394                return vec![default_package_data(DatasourceId::ConanConanFileTxt)];
395            }
396        };
397
398        let dependencies = parse_conanfile_txt(&contents);
399
400        vec![PackageData {
401            package_type: Some(Self::PACKAGE_TYPE),
402            dependencies,
403            primary_language: Some("C++".to_string()),
404            datasource_id: Some(DatasourceId::ConanConanFileTxt),
405            ..default_package_data(DatasourceId::ConanConanFileTxt)
406        }]
407    }
408}
409
410/// Conan lockfile (conan.lock) parser.
411///
412/// Extracts resolved dependencies from Conan lockfiles, which capture the
413/// complete dependency graph with exact versions and revisions.
414pub struct ConanLockParser;
415
416impl PackageParser for ConanLockParser {
417    const PACKAGE_TYPE: PackageType = PackageType::Conan;
418
419    fn is_match(path: &Path) -> bool {
420        path.file_name().is_some_and(|name| name == "conan.lock")
421    }
422
423    fn extract_packages(path: &Path) -> Vec<PackageData> {
424        let contents = match read_file_to_string(path, None) {
425            Ok(c) => c,
426            Err(e) => {
427                warn!("Failed to read {}: {}", path.display(), e);
428                return vec![default_package_data(DatasourceId::ConanLock)];
429            }
430        };
431
432        let json: Value = match serde_json::from_str(&contents) {
433            Ok(j) => j,
434            Err(e) => {
435                warn!("Failed to parse JSON in {}: {}", path.display(), e);
436                return vec![default_package_data(DatasourceId::ConanLock)];
437            }
438        };
439
440        let dependencies = parse_conan_lock(&json);
441
442        vec![PackageData {
443            package_type: Some(Self::PACKAGE_TYPE),
444            dependencies,
445            primary_language: Some("C++".to_string()),
446            datasource_id: Some(DatasourceId::ConanLock),
447            ..default_package_data(DatasourceId::ConanLock)
448        }]
449    }
450}
451
452fn parse_conan_reference(ref_str: &str) -> Option<Dependency> {
453    let (name, version_spec) = if let Some((n, v)) = ref_str.split_once('/') {
454        (n.trim(), Some(truncate_field(v.trim().to_string())))
455    } else {
456        (ref_str.trim(), None)
457    };
458
459    let version = version_spec.as_ref().and_then(|v| {
460        if !v.contains('[') && !v.contains('>') && !v.contains('<') {
461            Some(v.clone())
462        } else {
463            None
464        }
465    });
466
467    let purl = if let Some(v) = version.as_deref() {
468        PackageUrl::new("conan", name)
469            .map(|mut p| {
470                let _ = p.with_version(v);
471                p.to_string()
472            })
473            .unwrap_or_else(|_| format!("pkg:conan/{}", name))
474    } else {
475        format!("pkg:conan/{}", name)
476    };
477
478    let is_pinned = version_spec
479        .as_ref()
480        .map(|v| !v.contains('[') && !v.contains('>') && !v.contains('<'))
481        .unwrap_or(false);
482
483    Some(Dependency {
484        purl: Some(truncate_field(purl)),
485        extracted_requirement: version_spec,
486        scope: Some("install".to_string()),
487        is_runtime: Some(true),
488        is_optional: Some(false),
489        is_pinned: Some(is_pinned),
490        is_direct: Some(true),
491        resolved_package: None,
492        extra_data: None,
493    })
494}
495
496fn parse_conanfile_txt(contents: &str) -> Vec<Dependency> {
497    let mut dependencies = Vec::new();
498    let mut current_section = None;
499
500    for line in contents.lines().take(MAX_ITERATION_COUNT) {
501        let trimmed = line.trim();
502
503        if trimmed.is_empty() || trimmed.starts_with('#') {
504            continue;
505        }
506
507        if trimmed.starts_with('[') && trimmed.ends_with(']') {
508            current_section = Some(trimmed.trim_matches(|c| c == '[' || c == ']').to_string());
509            continue;
510        }
511
512        if let Some(ref section) = current_section {
513            let (scope, is_runtime) = match section.as_str() {
514                "requires" => ("install", true),
515                "build_requires" => ("build", false),
516                _ => continue,
517            };
518
519            if let Some(dep) = parse_conan_reference(trimmed) {
520                dependencies.push(Dependency {
521                    scope: Some(scope.to_string()),
522                    is_runtime: Some(is_runtime),
523                    ..dep
524                });
525            }
526        }
527    }
528
529    dependencies
530}
531
532fn parse_conan_lock(json: &Value) -> Vec<Dependency> {
533    let mut dependencies = Vec::new();
534
535    if let Some(graph_lock) = json.get("graph_lock")
536        && let Some(nodes) = graph_lock.get("nodes").and_then(|n| n.as_object())
537    {
538        for (_node_id, node_data) in nodes.iter().take(MAX_ITERATION_COUNT) {
539            if let Some(ref_str) = node_data.get("ref").and_then(|r| r.as_str())
540                && !ref_str.is_empty()
541                && ref_str != "conanfile"
542                && let Some(dep) = parse_conan_reference(ref_str)
543            {
544                dependencies.push(dep);
545            }
546        }
547    }
548
549    dependencies
550}
551
552fn default_package_data(datasource_id: DatasourceId) -> PackageData {
553    PackageData {
554        package_type: Some(ConanFilePyParser::PACKAGE_TYPE),
555        primary_language: Some("C++".to_string()),
556        datasource_id: Some(datasource_id),
557        ..Default::default()
558    }
559}