Skip to main content

provenant/parsers/nuget/
nuspec.rs

1use std::fs::File;
2use std::io::BufReader;
3use std::path::Path;
4
5use crate::models::{DatasourceId, Dependency, PackageData, PackageType};
6use crate::parser_warn as warn;
7use packageurl::PackageUrl;
8use quick_xml::Reader;
9use quick_xml::events::Event;
10
11use super::super::PackageParser;
12use super::super::license_normalization::{
13    empty_declared_license_data, normalize_spdx_declared_license,
14};
15use super::super::utils::{MAX_ITERATION_COUNT, truncate_field};
16use super::{
17    build_nuget_description, build_nuget_party, build_nuget_purl, build_nuget_urls,
18    check_file_size, default_package_data, insert_extra_string, parse_repository_metadata,
19};
20
21pub struct NuspecParser;
22
23impl PackageParser for NuspecParser {
24    const PACKAGE_TYPE: PackageType = PackageType::Nuget;
25
26    fn is_match(path: &Path) -> bool {
27        path.extension()
28            .and_then(|ext| ext.to_str())
29            .is_some_and(|ext| ext == "nuspec")
30    }
31
32    fn extract_packages(path: &Path) -> Vec<PackageData> {
33        if let Err(e) = check_file_size(path) {
34            warn!("{}", e);
35            return vec![default_package_data(Some(DatasourceId::NugetNuspec))];
36        }
37
38        let file = match File::open(path) {
39            Ok(f) => f,
40            Err(e) => {
41                warn!("Failed to open .nuspec at {:?}: {}", path, e);
42                return vec![default_package_data(Some(DatasourceId::NugetNuspec))];
43            }
44        };
45
46        let reader = BufReader::new(file);
47        let mut xml_reader = Reader::from_reader(reader);
48        xml_reader.config_mut().trim_text(true);
49
50        let mut name = None;
51        let mut version = None;
52        let mut summary = None;
53        let mut description = None;
54        let mut title = None;
55        let mut homepage_url = None;
56        let mut parties = Vec::new();
57        let mut dependencies = Vec::new();
58        let mut extracted_license_statement = None;
59        let mut license_type = None;
60        let mut copyright = None;
61        let mut vcs_url = None;
62        let mut repository_branch = None;
63        let mut repository_commit = None;
64
65        let mut buf = Vec::new();
66        let mut current_element = String::new();
67        let mut in_metadata = false;
68        let mut in_dependencies = false;
69        let mut current_group_framework = None;
70        let mut iteration_count: usize = 0;
71
72        loop {
73            iteration_count += 1;
74            if iteration_count > MAX_ITERATION_COUNT {
75                warn!(
76                    "Iteration limit exceeded in .nuspec at {:?}; stopping at {} items",
77                    path, MAX_ITERATION_COUNT
78                );
79                break;
80            }
81            match xml_reader.read_event_into(&mut buf) {
82                Ok(Event::Start(e)) => {
83                    let tag_name = String::from_utf8_lossy(e.name().as_ref()).to_string();
84                    current_element = tag_name.clone();
85
86                    if tag_name == "metadata" {
87                        in_metadata = true;
88                    } else if tag_name == "dependencies" && in_metadata {
89                        in_dependencies = true;
90                    } else if tag_name == "group" && in_dependencies {
91                        current_group_framework = e
92                            .attributes()
93                            .filter_map(|a| a.ok())
94                            .find(|attr| attr.key.as_ref() == b"targetFramework")
95                            .and_then(|attr| String::from_utf8(attr.value.to_vec()).ok());
96                    } else if tag_name == "repository" && in_metadata {
97                        let repository = parse_repository_metadata(&e);
98                        vcs_url = repository.vcs_url;
99                        repository_branch = repository.branch;
100                        repository_commit = repository.commit;
101                    } else if tag_name == "license" && in_metadata {
102                        license_type = e
103                            .attributes()
104                            .filter_map(|a| a.ok())
105                            .find(|attr| attr.key.as_ref() == b"type")
106                            .and_then(|attr| String::from_utf8(attr.value.to_vec()).ok());
107                    }
108                }
109                Ok(Event::Empty(e)) => {
110                    let tag_name = String::from_utf8_lossy(e.name().as_ref()).to_string();
111
112                    if tag_name == "dependency" && in_dependencies {
113                        if let Some(dep) =
114                            parse_nuspec_dependency(&e, current_group_framework.as_deref())
115                        {
116                            dependencies.push(dep);
117                        }
118                    } else if tag_name == "repository" && in_metadata {
119                        let repository = parse_repository_metadata(&e);
120                        vcs_url = repository.vcs_url;
121                        repository_branch = repository.branch;
122                        repository_commit = repository.commit;
123                    }
124                }
125                Ok(Event::Text(e)) => {
126                    if !in_metadata {
127                        continue;
128                    }
129
130                    let text = e.decode().ok().map(|s| s.trim().to_string());
131                    if let Some(text) = text.filter(|s| !s.is_empty()) {
132                        match current_element.as_str() {
133                            "id" => name = Some(text),
134                            "version" => version = Some(text),
135                            "summary" => summary = Some(text),
136                            "description" => description = Some(text),
137                            "title" => title = Some(text),
138                            "projectUrl" => homepage_url = Some(text),
139                            "authors" => {
140                                parties.push(build_nuget_party("author", text));
141                            }
142                            "owners" => {
143                                parties.push(build_nuget_party("owner", text));
144                            }
145                            "license" => {
146                                extracted_license_statement = Some(text);
147                            }
148                            "licenseUrl" => {
149                                if extracted_license_statement.is_none() {
150                                    extracted_license_statement = Some(text);
151                                }
152                            }
153                            "copyright" => copyright = Some(text),
154                            _ => {}
155                        }
156                    }
157                }
158                Ok(Event::End(e)) => {
159                    let tag_name = String::from_utf8_lossy(e.name().as_ref()).to_string();
160
161                    if tag_name == "metadata" {
162                        in_metadata = false;
163                    } else if tag_name == "dependencies" {
164                        in_dependencies = false;
165                    } else if tag_name == "group" {
166                        current_group_framework = None;
167                    }
168
169                    current_element.clear();
170                }
171                Ok(Event::Eof) => break,
172                Err(e) => {
173                    warn!("Error parsing .nuspec at {:?}: {}", path, e);
174                    return vec![default_package_data(Some(DatasourceId::NugetNuspec))];
175                }
176                _ => {}
177            }
178            buf.clear();
179        }
180
181        let final_description = build_nuget_description(
182            summary.as_deref(),
183            description.as_deref(),
184            title.as_deref(),
185            name.as_deref(),
186        );
187
188        let (repository_homepage_url, repository_download_url, api_data_url) =
189            build_nuget_urls(name.as_deref(), version.as_deref());
190
191        let purl = build_nuget_purl(name.as_deref(), version.as_deref());
192
193        let (declared_license_expression, declared_license_expression_spdx, license_detections) =
194            if license_type.as_deref() == Some("expression") {
195                normalize_spdx_declared_license(extracted_license_statement.as_deref())
196            } else {
197                empty_declared_license_data()
198            };
199
200        let holder = None;
201
202        let mut extra_data = serde_json::Map::new();
203        insert_extra_string(&mut extra_data, "license_type", license_type.clone());
204        if license_type.as_deref() == Some("file") {
205            insert_extra_string(
206                &mut extra_data,
207                "license_file",
208                extracted_license_statement.clone(),
209            );
210        }
211        insert_extra_string(&mut extra_data, "repository_branch", repository_branch);
212        insert_extra_string(&mut extra_data, "repository_commit", repository_commit);
213
214        vec![PackageData {
215            datasource_id: Some(DatasourceId::NugetNuspec),
216            package_type: Some(Self::PACKAGE_TYPE),
217            name: name.map(truncate_field),
218            version: version.map(truncate_field),
219            purl,
220            description: final_description.map(truncate_field),
221            homepage_url: homepage_url.map(truncate_field),
222            parties,
223            dependencies,
224            declared_license_expression,
225            declared_license_expression_spdx,
226            license_detections,
227            extracted_license_statement: extracted_license_statement.map(truncate_field),
228            copyright: copyright.map(truncate_field),
229            holder,
230            vcs_url: vcs_url.map(truncate_field),
231            extra_data: if extra_data.is_empty() {
232                None
233            } else {
234                Some(extra_data.into_iter().collect())
235            },
236            repository_homepage_url,
237            repository_download_url,
238            api_data_url,
239            ..default_package_data(Some(DatasourceId::NugetNuspec))
240        }]
241    }
242}
243
244pub(super) fn parse_nuspec_dependency(
245    element: &quick_xml::events::BytesStart,
246    framework: Option<&str>,
247) -> Option<Dependency> {
248    let mut id = None;
249    let mut version = None;
250    let mut include = None;
251    let mut exclude = None;
252
253    for attr in element.attributes().filter_map(|a| a.ok()) {
254        match attr.key.as_ref() {
255            b"id" => id = String::from_utf8(attr.value.to_vec()).ok(),
256            b"version" => version = String::from_utf8(attr.value.to_vec()).ok(),
257            b"include" => include = String::from_utf8(attr.value.to_vec()).ok(),
258            b"exclude" => exclude = String::from_utf8(attr.value.to_vec()).ok(),
259            _ => {}
260        }
261    }
262
263    let name = id?;
264    let purl = PackageUrl::new("nuget", &name).ok().map(|p| p.to_string());
265
266    let mut extra_data = serde_json::Map::new();
267    if let Some(fw) = framework {
268        extra_data.insert(
269            "framework".to_string(),
270            serde_json::Value::String(fw.to_string()),
271        );
272    }
273    if let Some(inc) = include {
274        extra_data.insert("include".to_string(), serde_json::Value::String(inc));
275    }
276    if let Some(exc) = exclude {
277        extra_data.insert("exclude".to_string(), serde_json::Value::String(exc));
278    }
279
280    Some(Dependency {
281        purl,
282        extracted_requirement: version,
283        scope: Some("dependency".to_string()),
284        is_runtime: Some(true),
285        is_optional: Some(false),
286        is_pinned: Some(false),
287        is_direct: Some(true),
288        resolved_package: None,
289        extra_data: if extra_data.is_empty() {
290            None
291        } else {
292            Some(extra_data.into_iter().collect())
293        },
294    })
295}
296
297pub(super) fn parse_nuspec_content(content: &str) -> Result<PackageData, String> {
298    use quick_xml::Reader;
299
300    let mut xml_reader = Reader::from_str(content);
301    xml_reader.config_mut().trim_text(true);
302
303    let mut name = None;
304    let mut version = None;
305    let mut description = None;
306    let mut homepage_url = None;
307    let mut parties = Vec::new();
308    let mut dependencies = Vec::new();
309    let mut extracted_license_statement = None;
310    let mut license_type = None;
311    let mut copyright = None;
312    let mut vcs_url = None;
313    let mut repository_branch = None;
314    let mut repository_commit = None;
315
316    let mut buf = Vec::new();
317    let mut current_element = String::new();
318    let mut in_metadata = false;
319    let mut in_dependencies = false;
320    let mut current_group_framework = None;
321    let mut iteration_count: usize = 0;
322
323    loop {
324        iteration_count += 1;
325        if iteration_count > MAX_ITERATION_COUNT {
326            return Err(format!(
327                "Iteration limit exceeded parsing .nuspec content; stopping at {} items",
328                MAX_ITERATION_COUNT
329            ));
330        }
331        match xml_reader.read_event_into(&mut buf) {
332            Ok(Event::Start(e)) => {
333                let tag_name = String::from_utf8_lossy(e.name().as_ref()).to_string();
334                current_element = tag_name.clone();
335
336                if tag_name == "metadata" {
337                    in_metadata = true;
338                } else if tag_name == "dependencies" && in_metadata {
339                    in_dependencies = true;
340                } else if tag_name == "group" && in_dependencies {
341                    current_group_framework = e
342                        .attributes()
343                        .filter_map(|a| a.ok())
344                        .find(|attr| attr.key.as_ref() == b"targetFramework")
345                        .and_then(|attr| String::from_utf8(attr.value.to_vec()).ok());
346                } else if tag_name == "repository" && in_metadata {
347                    let repository = parse_repository_metadata(&e);
348                    vcs_url = repository.vcs_url;
349                    repository_branch = repository.branch;
350                    repository_commit = repository.commit;
351                } else if tag_name == "license" && in_metadata {
352                    license_type = e
353                        .attributes()
354                        .filter_map(|a| a.ok())
355                        .find(|attr| attr.key.as_ref() == b"type")
356                        .and_then(|attr| String::from_utf8(attr.value.to_vec()).ok());
357                }
358            }
359            Ok(Event::Empty(e)) => {
360                let tag_name = String::from_utf8_lossy(e.name().as_ref()).to_string();
361
362                if tag_name == "dependency" && in_dependencies {
363                    if let Some(dep) =
364                        parse_nuspec_dependency(&e, current_group_framework.as_deref())
365                    {
366                        dependencies.push(dep);
367                    }
368                } else if tag_name == "repository" && in_metadata {
369                    let repository = parse_repository_metadata(&e);
370                    vcs_url = repository.vcs_url;
371                    repository_branch = repository.branch;
372                    repository_commit = repository.commit;
373                }
374            }
375            Ok(Event::Text(e)) => {
376                if !in_metadata {
377                    continue;
378                }
379
380                let text = e.decode().ok().map(|s| s.trim().to_string());
381                if let Some(text) = text.filter(|s| !s.is_empty()) {
382                    match current_element.as_str() {
383                        "id" => name = Some(text),
384                        "version" => version = Some(text),
385                        "description" => description = Some(text),
386                        "projectUrl" => homepage_url = Some(text),
387                        "authors" => {
388                            parties.push(build_nuget_party("author", text));
389                        }
390                        "owners" => {
391                            parties.push(build_nuget_party("owner", text));
392                        }
393                        "license" => {
394                            extracted_license_statement = Some(text);
395                        }
396                        "licenseUrl" => {
397                            if extracted_license_statement.is_none() {
398                                extracted_license_statement = Some(text);
399                            }
400                        }
401                        "copyright" => copyright = Some(text),
402                        _ => {}
403                    }
404                }
405            }
406            Ok(Event::End(e)) => {
407                let tag_name = String::from_utf8_lossy(e.name().as_ref()).to_string();
408
409                if tag_name == "metadata" {
410                    in_metadata = false;
411                } else if tag_name == "dependencies" {
412                    in_dependencies = false;
413                } else if tag_name == "group" {
414                    current_group_framework = None;
415                }
416
417                current_element.clear();
418            }
419            Ok(Event::Eof) => break,
420            Err(e) => {
421                return Err(format!("XML parsing error: {}", e));
422            }
423            _ => {}
424        }
425        buf.clear();
426    }
427
428    let (repository_homepage_url, repository_download_url, api_data_url) =
429        build_nuget_urls(name.as_deref(), version.as_deref());
430
431    let (declared_license_expression, declared_license_expression_spdx, license_detections) =
432        if license_type.as_deref() == Some("expression") {
433            normalize_spdx_declared_license(extracted_license_statement.as_deref())
434        } else {
435            empty_declared_license_data()
436        };
437
438    let holder = None;
439
440    let mut extra_data = serde_json::Map::new();
441    insert_extra_string(&mut extra_data, "license_type", license_type.clone());
442    if license_type.as_deref() == Some("file") {
443        insert_extra_string(
444            &mut extra_data,
445            "license_file",
446            extracted_license_statement.clone(),
447        );
448    }
449    insert_extra_string(&mut extra_data, "repository_branch", repository_branch);
450    insert_extra_string(&mut extra_data, "repository_commit", repository_commit);
451
452    Ok(PackageData {
453        datasource_id: Some(DatasourceId::NugetNupkg),
454        package_type: Some(super::nupkg::NupkgParser::PACKAGE_TYPE),
455        name: name.map(truncate_field),
456        version: version.map(truncate_field),
457        description: description.map(truncate_field),
458        homepage_url: homepage_url.map(truncate_field),
459        parties,
460        dependencies,
461        declared_license_expression,
462        declared_license_expression_spdx,
463        license_detections,
464        extracted_license_statement: extracted_license_statement.map(truncate_field),
465        copyright: copyright.map(truncate_field),
466        holder,
467        vcs_url: vcs_url.map(truncate_field),
468        extra_data: if extra_data.is_empty() {
469            None
470        } else {
471            Some(extra_data.into_iter().collect())
472        },
473        repository_homepage_url,
474        repository_download_url,
475        api_data_url,
476        ..default_package_data(Some(DatasourceId::NugetNupkg))
477    })
478}
479
480crate::register_parser!(
481    ".NET .nuspec package specification",
482    &["**/*.nuspec"],
483    "nuget",
484    "C#",
485    Some("https://learn.microsoft.com/en-us/nuget/reference/nuspec"),
486);