Skip to main content

provenant/parsers/nuget/
nuspec.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use std::fs::File;
5use std::io::BufReader;
6use std::path::Path;
7
8use crate::models::{DatasourceId, Dependency, PackageData, PackageType};
9use crate::parser_warn as warn;
10use packageurl::PackageUrl;
11use quick_xml::Reader;
12use quick_xml::events::Event;
13
14use super::super::PackageParser;
15use super::super::license_normalization::{
16    empty_declared_license_data, normalize_spdx_declared_license,
17};
18use super::super::utils::{MAX_ITERATION_COUNT, truncate_field};
19use super::{
20    build_nuget_description, build_nuget_party, build_nuget_purl, build_nuget_urls,
21    check_file_size, default_package_data, insert_extra_string, parse_repository_metadata,
22};
23
24pub struct NuspecParser;
25
26impl PackageParser for NuspecParser {
27    const PACKAGE_TYPE: PackageType = PackageType::Nuget;
28
29    fn is_match(path: &Path) -> bool {
30        path.extension()
31            .and_then(|ext| ext.to_str())
32            .is_some_and(|ext| ext == "nuspec")
33    }
34
35    fn extract_packages(path: &Path) -> Vec<PackageData> {
36        if let Err(e) = check_file_size(path) {
37            warn!("{}", e);
38            return vec![default_package_data(Some(DatasourceId::NugetNuspec))];
39        }
40
41        let file = match File::open(path) {
42            Ok(f) => f,
43            Err(e) => {
44                warn!("Failed to open .nuspec at {:?}: {}", path, e);
45                return vec![default_package_data(Some(DatasourceId::NugetNuspec))];
46            }
47        };
48
49        let reader = BufReader::new(file);
50        let mut xml_reader = Reader::from_reader(reader);
51        xml_reader.config_mut().trim_text(true);
52
53        let mut name = None;
54        let mut version = None;
55        let mut summary = None;
56        let mut description = None;
57        let mut title = None;
58        let mut homepage_url = None;
59        let mut parties = Vec::new();
60        let mut dependencies = Vec::new();
61        let mut extracted_license_statement = None;
62        let mut license_type = None;
63        let mut copyright = None;
64        let mut vcs_url = None;
65        let mut repository_branch = None;
66        let mut repository_commit = None;
67
68        let mut buf = Vec::new();
69        let mut current_element = String::new();
70        let mut in_metadata = false;
71        let mut in_dependencies = false;
72        let mut current_group_framework = None;
73        let mut iteration_count: usize = 0;
74
75        loop {
76            iteration_count += 1;
77            if iteration_count > MAX_ITERATION_COUNT {
78                warn!(
79                    "Iteration limit exceeded in .nuspec at {:?}; stopping at {} items",
80                    path, MAX_ITERATION_COUNT
81                );
82                break;
83            }
84            match xml_reader.read_event_into(&mut buf) {
85                Ok(Event::Start(e)) => {
86                    let tag_name = String::from_utf8_lossy(e.name().as_ref()).to_string();
87                    current_element = tag_name.clone();
88
89                    if tag_name == "metadata" {
90                        in_metadata = true;
91                    } else if tag_name == "dependencies" && in_metadata {
92                        in_dependencies = true;
93                    } else if tag_name == "group" && in_dependencies {
94                        current_group_framework = e
95                            .attributes()
96                            .filter_map(|a| a.ok())
97                            .find(|attr| attr.key.as_ref() == b"targetFramework")
98                            .and_then(|attr| String::from_utf8(attr.value.to_vec()).ok());
99                    } else if tag_name == "repository" && in_metadata {
100                        let repository = parse_repository_metadata(&e);
101                        vcs_url = repository.vcs_url;
102                        repository_branch = repository.branch;
103                        repository_commit = repository.commit;
104                    } else if tag_name == "license" && in_metadata {
105                        license_type = e
106                            .attributes()
107                            .filter_map(|a| a.ok())
108                            .find(|attr| attr.key.as_ref() == b"type")
109                            .and_then(|attr| String::from_utf8(attr.value.to_vec()).ok());
110                    }
111                }
112                Ok(Event::Empty(e)) => {
113                    let tag_name = String::from_utf8_lossy(e.name().as_ref()).to_string();
114
115                    if tag_name == "dependency" && in_dependencies {
116                        if let Some(dep) =
117                            parse_nuspec_dependency(&e, current_group_framework.as_deref())
118                        {
119                            dependencies.push(dep);
120                        }
121                    } else if tag_name == "repository" && in_metadata {
122                        let repository = parse_repository_metadata(&e);
123                        vcs_url = repository.vcs_url;
124                        repository_branch = repository.branch;
125                        repository_commit = repository.commit;
126                    }
127                }
128                Ok(Event::Text(e)) => {
129                    if !in_metadata {
130                        continue;
131                    }
132
133                    let text = e.decode().ok().map(|s| s.trim().to_string());
134                    if let Some(text) = text.filter(|s| !s.is_empty()) {
135                        match current_element.as_str() {
136                            "id" => name = Some(text),
137                            "version" => version = Some(text),
138                            "summary" => summary = Some(text),
139                            "description" => description = Some(text),
140                            "title" => title = Some(text),
141                            "projectUrl" => homepage_url = Some(text),
142                            "authors" => {
143                                parties.push(build_nuget_party("author", text));
144                            }
145                            "owners" => {
146                                parties.push(build_nuget_party("owner", text));
147                            }
148                            "license" => {
149                                extracted_license_statement = Some(text);
150                            }
151                            "licenseUrl" if extracted_license_statement.is_none() => {
152                                extracted_license_statement = Some(text);
153                            }
154                            "licenseUrl" => {}
155                            "copyright" => copyright = Some(text),
156                            _ => {}
157                        }
158                    }
159                }
160                Ok(Event::End(e)) => {
161                    let tag_name = String::from_utf8_lossy(e.name().as_ref()).to_string();
162
163                    if tag_name == "metadata" {
164                        in_metadata = false;
165                    } else if tag_name == "dependencies" {
166                        in_dependencies = false;
167                    } else if tag_name == "group" {
168                        current_group_framework = None;
169                    }
170
171                    current_element.clear();
172                }
173                Ok(Event::Eof) => break,
174                Err(e) => {
175                    warn!("Error parsing .nuspec at {:?}: {}", path, e);
176                    return vec![default_package_data(Some(DatasourceId::NugetNuspec))];
177                }
178                _ => {}
179            }
180            buf.clear();
181        }
182
183        let final_description = build_nuget_description(
184            summary.as_deref(),
185            description.as_deref(),
186            title.as_deref(),
187            name.as_deref(),
188        );
189
190        let (repository_homepage_url, repository_download_url, api_data_url) =
191            build_nuget_urls(name.as_deref(), version.as_deref());
192
193        let purl = build_nuget_purl(name.as_deref(), version.as_deref());
194
195        let (declared_license_expression, declared_license_expression_spdx, license_detections) =
196            if license_type.as_deref() == Some("expression") {
197                normalize_spdx_declared_license(extracted_license_statement.as_deref())
198            } else {
199                empty_declared_license_data()
200            };
201
202        let holder = None;
203
204        let mut extra_data = serde_json::Map::new();
205        insert_extra_string(&mut extra_data, "license_type", license_type.clone());
206        if license_type.as_deref() == Some("file") {
207            insert_extra_string(
208                &mut extra_data,
209                "license_file",
210                extracted_license_statement.clone(),
211            );
212        }
213        insert_extra_string(&mut extra_data, "repository_branch", repository_branch);
214        insert_extra_string(&mut extra_data, "repository_commit", repository_commit);
215
216        vec![PackageData {
217            datasource_id: Some(DatasourceId::NugetNuspec),
218            package_type: Some(Self::PACKAGE_TYPE),
219            name: name.map(truncate_field),
220            version: version.map(truncate_field),
221            purl,
222            description: final_description.map(truncate_field),
223            homepage_url: homepage_url.map(truncate_field),
224            parties,
225            dependencies,
226            declared_license_expression,
227            declared_license_expression_spdx,
228            license_detections,
229            extracted_license_statement: extracted_license_statement.map(truncate_field),
230            copyright: copyright.map(truncate_field),
231            holder,
232            vcs_url: vcs_url.map(truncate_field),
233            extra_data: if extra_data.is_empty() {
234                None
235            } else {
236                Some(extra_data.into_iter().collect())
237            },
238            repository_homepage_url,
239            repository_download_url,
240            api_data_url,
241            ..default_package_data(Some(DatasourceId::NugetNuspec))
242        }]
243    }
244
245    fn metadata() -> Vec<super::super::metadata::ParserMetadata> {
246        vec![super::super::metadata::ParserMetadata {
247            description: ".NET .nuspec package specification",
248            file_patterns: &["**/*.nuspec"],
249            package_type: "nuget",
250            primary_language: "C#",
251            documentation_url: Some("https://learn.microsoft.com/en-us/nuget/reference/nuspec"),
252        }]
253    }
254}
255
256pub(super) fn parse_nuspec_dependency(
257    element: &quick_xml::events::BytesStart,
258    framework: Option<&str>,
259) -> Option<Dependency> {
260    let mut id = None;
261    let mut version = None;
262    let mut include = None;
263    let mut exclude = None;
264
265    for attr in element.attributes().filter_map(|a| a.ok()) {
266        match attr.key.as_ref() {
267            b"id" => id = String::from_utf8(attr.value.to_vec()).ok(),
268            b"version" => version = String::from_utf8(attr.value.to_vec()).ok(),
269            b"include" => include = String::from_utf8(attr.value.to_vec()).ok(),
270            b"exclude" => exclude = String::from_utf8(attr.value.to_vec()).ok(),
271            _ => {}
272        }
273    }
274
275    let name = id?;
276    let purl = PackageUrl::new("nuget", &name).ok().map(|p| p.to_string());
277
278    let mut extra_data = serde_json::Map::new();
279    if let Some(fw) = framework {
280        extra_data.insert(
281            "framework".to_string(),
282            serde_json::Value::String(fw.to_string()),
283        );
284    }
285    if let Some(inc) = include {
286        extra_data.insert("include".to_string(), serde_json::Value::String(inc));
287    }
288    if let Some(exc) = exclude {
289        extra_data.insert("exclude".to_string(), serde_json::Value::String(exc));
290    }
291
292    Some(Dependency {
293        purl,
294        extracted_requirement: version,
295        scope: Some("dependency".to_string()),
296        is_runtime: Some(true),
297        is_optional: Some(false),
298        is_pinned: Some(false),
299        is_direct: Some(true),
300        resolved_package: None,
301        extra_data: if extra_data.is_empty() {
302            None
303        } else {
304            Some(extra_data.into_iter().collect())
305        },
306    })
307}
308
309pub(super) fn parse_nuspec_content(content: &str) -> Result<PackageData, String> {
310    use quick_xml::Reader;
311
312    let mut xml_reader = Reader::from_str(content);
313    xml_reader.config_mut().trim_text(true);
314
315    let mut name = None;
316    let mut version = None;
317    let mut description = None;
318    let mut homepage_url = None;
319    let mut parties = Vec::new();
320    let mut dependencies = Vec::new();
321    let mut extracted_license_statement = None;
322    let mut license_type = None;
323    let mut copyright = None;
324    let mut vcs_url = None;
325    let mut repository_branch = None;
326    let mut repository_commit = None;
327
328    let mut buf = Vec::new();
329    let mut current_element = String::new();
330    let mut in_metadata = false;
331    let mut in_dependencies = false;
332    let mut current_group_framework = None;
333    let mut iteration_count: usize = 0;
334
335    loop {
336        iteration_count += 1;
337        if iteration_count > MAX_ITERATION_COUNT {
338            return Err(format!(
339                "Iteration limit exceeded parsing .nuspec content; stopping at {} items",
340                MAX_ITERATION_COUNT
341            ));
342        }
343        match xml_reader.read_event_into(&mut buf) {
344            Ok(Event::Start(e)) => {
345                let tag_name = String::from_utf8_lossy(e.name().as_ref()).to_string();
346                current_element = tag_name.clone();
347
348                if tag_name == "metadata" {
349                    in_metadata = true;
350                } else if tag_name == "dependencies" && in_metadata {
351                    in_dependencies = true;
352                } else if tag_name == "group" && in_dependencies {
353                    current_group_framework = e
354                        .attributes()
355                        .filter_map(|a| a.ok())
356                        .find(|attr| attr.key.as_ref() == b"targetFramework")
357                        .and_then(|attr| String::from_utf8(attr.value.to_vec()).ok());
358                } else if tag_name == "repository" && in_metadata {
359                    let repository = parse_repository_metadata(&e);
360                    vcs_url = repository.vcs_url;
361                    repository_branch = repository.branch;
362                    repository_commit = repository.commit;
363                } else if tag_name == "license" && in_metadata {
364                    license_type = e
365                        .attributes()
366                        .filter_map(|a| a.ok())
367                        .find(|attr| attr.key.as_ref() == b"type")
368                        .and_then(|attr| String::from_utf8(attr.value.to_vec()).ok());
369                }
370            }
371            Ok(Event::Empty(e)) => {
372                let tag_name = String::from_utf8_lossy(e.name().as_ref()).to_string();
373
374                if tag_name == "dependency" && in_dependencies {
375                    if let Some(dep) =
376                        parse_nuspec_dependency(&e, current_group_framework.as_deref())
377                    {
378                        dependencies.push(dep);
379                    }
380                } else if tag_name == "repository" && in_metadata {
381                    let repository = parse_repository_metadata(&e);
382                    vcs_url = repository.vcs_url;
383                    repository_branch = repository.branch;
384                    repository_commit = repository.commit;
385                }
386            }
387            Ok(Event::Text(e)) => {
388                if !in_metadata {
389                    continue;
390                }
391
392                let text = e.decode().ok().map(|s| s.trim().to_string());
393                if let Some(text) = text.filter(|s| !s.is_empty()) {
394                    match current_element.as_str() {
395                        "id" => name = Some(text),
396                        "version" => version = Some(text),
397                        "description" => description = Some(text),
398                        "projectUrl" => homepage_url = Some(text),
399                        "authors" => {
400                            parties.push(build_nuget_party("author", text));
401                        }
402                        "owners" => {
403                            parties.push(build_nuget_party("owner", text));
404                        }
405                        "license" => {
406                            extracted_license_statement = Some(text);
407                        }
408                        "licenseUrl" if extracted_license_statement.is_none() => {
409                            extracted_license_statement = Some(text);
410                        }
411                        "licenseUrl" => {}
412                        "copyright" => copyright = Some(text),
413                        _ => {}
414                    }
415                }
416            }
417            Ok(Event::End(e)) => {
418                let tag_name = String::from_utf8_lossy(e.name().as_ref()).to_string();
419
420                if tag_name == "metadata" {
421                    in_metadata = false;
422                } else if tag_name == "dependencies" {
423                    in_dependencies = false;
424                } else if tag_name == "group" {
425                    current_group_framework = None;
426                }
427
428                current_element.clear();
429            }
430            Ok(Event::Eof) => break,
431            Err(e) => {
432                return Err(format!("XML parsing error: {}", e));
433            }
434            _ => {}
435        }
436        buf.clear();
437    }
438
439    let (repository_homepage_url, repository_download_url, api_data_url) =
440        build_nuget_urls(name.as_deref(), version.as_deref());
441
442    let (declared_license_expression, declared_license_expression_spdx, license_detections) =
443        if license_type.as_deref() == Some("expression") {
444            normalize_spdx_declared_license(extracted_license_statement.as_deref())
445        } else {
446            empty_declared_license_data()
447        };
448
449    let holder = None;
450
451    let mut extra_data = serde_json::Map::new();
452    insert_extra_string(&mut extra_data, "license_type", license_type.clone());
453    if license_type.as_deref() == Some("file") {
454        insert_extra_string(
455            &mut extra_data,
456            "license_file",
457            extracted_license_statement.clone(),
458        );
459    }
460    insert_extra_string(&mut extra_data, "repository_branch", repository_branch);
461    insert_extra_string(&mut extra_data, "repository_commit", repository_commit);
462
463    Ok(PackageData {
464        datasource_id: Some(DatasourceId::NugetNupkg),
465        package_type: Some(super::nupkg::NupkgParser::PACKAGE_TYPE),
466        name: name.map(truncate_field),
467        version: version.map(truncate_field),
468        description: description.map(truncate_field),
469        homepage_url: homepage_url.map(truncate_field),
470        parties,
471        dependencies,
472        declared_license_expression,
473        declared_license_expression_spdx,
474        license_detections,
475        extracted_license_statement: extracted_license_statement.map(truncate_field),
476        copyright: copyright.map(truncate_field),
477        holder,
478        vcs_url: vcs_url.map(truncate_field),
479        extra_data: if extra_data.is_empty() {
480            None
481        } else {
482            Some(extra_data.into_iter().collect())
483        },
484        repository_homepage_url,
485        repository_download_url,
486        api_data_url,
487        ..default_package_data(Some(DatasourceId::NugetNupkg))
488    })
489}