Skip to main content

provenant/parsers/
bower.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Parser for Bower package manifests (bower.json).
5//!
6//! Extracts package metadata, dependencies, and license information from
7//! bower.json files used by the legacy Bower JavaScript package manager.
8//!
9//! # Supported Formats
10//! - bower.json (manifest)
11//! - .bower.json (alternative manifest)
12//!
13//! # Key Features
14//! - Dependency extraction (dependencies, devDependencies)
15//! - License extraction (string or array format)
16//! - Author parsing (string or object format)
17//! - VCS repository URL extraction
18//! - Private package detection
19//!
20//! # Implementation Notes
21//! - Uses serde_json for JSON parsing
22//! - Graceful error handling: logs warnings and returns default on parse failure
23//! - Authors field can be string, object, or array of either
24
25use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
26use crate::parser_warn as warn;
27use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
28use packageurl::PackageUrl;
29use serde_json::Value;
30use std::path::Path;
31
32use super::PackageParser;
33use super::license_normalization::{
34    DeclaredLicenseMatchMetadata, build_declared_license_data, combine_normalized_licenses,
35    empty_declared_license_data, normalize_declared_license_key, normalize_spdx_declared_license,
36};
37
38const FIELD_NAME: &str = "name";
39const FIELD_VERSION: &str = "version";
40const FIELD_DESCRIPTION: &str = "description";
41const FIELD_LICENSE: &str = "license";
42const FIELD_KEYWORDS: &str = "keywords";
43const FIELD_AUTHORS: &str = "authors";
44const FIELD_HOMEPAGE: &str = "homepage";
45const FIELD_REPOSITORY: &str = "repository";
46const FIELD_DEPENDENCIES: &str = "dependencies";
47const FIELD_DEV_DEPENDENCIES: &str = "devDependencies";
48const FIELD_PRIVATE: &str = "private";
49
50/// Bower package parser for bower.json manifests.
51///
52/// Supports legacy Bower JavaScript package manager format with all
53/// standard fields including dependencies, devDependencies, authors, and licenses.
54pub struct BowerJsonParser;
55
56impl PackageParser for BowerJsonParser {
57    const PACKAGE_TYPE: PackageType = PackageType::Bower;
58
59    fn extract_packages(path: &Path) -> Vec<PackageData> {
60        let json = match read_and_parse_json(path) {
61            Ok(json) => json,
62            Err(e) => {
63                warn!("Failed to read or parse bower.json at {:?}: {}", path, e);
64                return vec![default_package_data()];
65            }
66        };
67
68        let name = json
69            .get(FIELD_NAME)
70            .and_then(|v| v.as_str())
71            .map(|s| truncate_field(s.to_string()));
72
73        // If name is missing, the package is considered private
74        let is_private = if name.is_none() {
75            true
76        } else {
77            json.get(FIELD_PRIVATE)
78                .and_then(|v| v.as_bool())
79                .unwrap_or(false)
80        };
81
82        let version = json
83            .get(FIELD_VERSION)
84            .and_then(|v| v.as_str())
85            .map(|s| truncate_field(s.to_string()));
86
87        let description = json
88            .get(FIELD_DESCRIPTION)
89            .and_then(|v| v.as_str())
90            .map(|s| truncate_field(s.to_string()));
91
92        let extracted_license_statement = extract_license_statement(&json);
93        let (declared_license_expression, declared_license_expression_spdx, license_detections) =
94            normalize_bower_declared_license(&json, extracted_license_statement.as_deref());
95        let declared_license_expression = declared_license_expression.map(truncate_field);
96        let declared_license_expression_spdx = declared_license_expression_spdx.map(truncate_field);
97        let keywords = extract_keywords(&json);
98        let parties = extract_parties(&json);
99        let homepage_url = json
100            .get(FIELD_HOMEPAGE)
101            .and_then(|v| v.as_str())
102            .map(|s| truncate_field(s.to_string()));
103
104        let vcs_url = extract_vcs_url(&json);
105        let dependencies = extract_dependencies(&json, FIELD_DEPENDENCIES, "dependencies", true);
106        let dev_dependencies =
107            extract_dependencies(&json, FIELD_DEV_DEPENDENCIES, "devDependencies", false);
108
109        vec![PackageData {
110            package_type: Some(Self::PACKAGE_TYPE),
111            namespace: None,
112            name,
113            version,
114            qualifiers: None,
115            subpath: None,
116            primary_language: Some("JavaScript".to_string()),
117            description,
118            release_date: None,
119            parties,
120            keywords,
121            homepage_url,
122            download_url: None,
123            size: None,
124            sha1: None,
125            md5: None,
126            sha256: None,
127            sha512: None,
128            bug_tracking_url: None,
129            code_view_url: None,
130            vcs_url,
131            copyright: None,
132            holder: None,
133            declared_license_expression,
134            declared_license_expression_spdx,
135            license_detections,
136            other_license_expression: None,
137            other_license_expression_spdx: None,
138            other_license_detections: Vec::new(),
139            extracted_license_statement,
140            notice_text: None,
141            source_packages: Vec::new(),
142            file_references: Vec::new(),
143            is_private,
144            is_virtual: false,
145            extra_data: None,
146            dependencies: [dependencies, dev_dependencies].concat(),
147            repository_homepage_url: None,
148            repository_download_url: None,
149            api_data_url: None,
150            datasource_id: Some(DatasourceId::BowerJson),
151            purl: None,
152        }]
153    }
154
155    fn is_match(path: &Path) -> bool {
156        path.file_name()
157            .is_some_and(|name| name == "bower.json" || name == ".bower.json")
158    }
159
160    fn metadata() -> Vec<super::metadata::ParserMetadata> {
161        vec![super::metadata::ParserMetadata {
162            description: "Bower package manifest",
163            file_patterns: &["**/bower.json", "**/.bower.json"],
164            package_type: "bower",
165            primary_language: "JavaScript",
166            documentation_url: Some("https://bower.io"),
167        }]
168    }
169}
170
171/// Reads and parses a JSON file
172fn read_and_parse_json(path: &Path) -> Result<Value, String> {
173    let content =
174        read_file_to_string(path, None).map_err(|e| format!("Failed to read file: {}", e))?;
175    serde_json::from_str(&content).map_err(|e| format!("Failed to parse JSON: {}", e))
176}
177
178/// Extracts license statement from the license field.
179/// Can be a string or an array of strings.
180fn extract_license_statement(json: &Value) -> Option<String> {
181    json.get(FIELD_LICENSE)
182        .and_then(|license_value| match license_value {
183            Value::String(s) => {
184                let trimmed = s.trim();
185                if trimmed.is_empty() {
186                    None
187                } else {
188                    Some(truncate_field(trimmed.to_string()))
189                }
190            }
191            Value::Array(licenses) => {
192                let license_strings: Vec<String> = licenses
193                    .iter()
194                    .take(MAX_ITERATION_COUNT)
195                    .filter_map(|v| v.as_str())
196                    .map(|s| s.trim())
197                    .filter(|s| !s.is_empty())
198                    .map(String::from)
199                    .collect();
200
201                if license_strings.is_empty() {
202                    None
203                } else {
204                    Some(truncate_field(license_strings.join(" AND ")))
205                }
206            }
207            _ => None,
208        })
209}
210
211fn normalize_bower_declared_license(
212    json: &Value,
213    extracted_license_statement: Option<&str>,
214) -> (
215    Option<String>,
216    Option<String>,
217    Vec<crate::models::LicenseDetection>,
218) {
219    match json.get(FIELD_LICENSE) {
220        Some(Value::Array(licenses)) => {
221            let normalized = licenses
222                .iter()
223                .take(MAX_ITERATION_COUNT)
224                .filter_map(|value| value.as_str().map(str::trim))
225                .filter(|value| !value.is_empty())
226                .map(normalize_declared_license_key)
227                .collect::<Option<Vec<_>>>();
228
229            if let Some(normalized) = normalized
230                && let Some(combined) = combine_normalized_licenses(normalized, " AND ")
231            {
232                return build_declared_license_data(
233                    combined,
234                    DeclaredLicenseMatchMetadata::single_line(
235                        extracted_license_statement.unwrap_or_default(),
236                    ),
237                );
238            }
239
240            empty_declared_license_data()
241        }
242        _ => normalize_spdx_declared_license(extracted_license_statement),
243    }
244}
245
246/// Extracts keywords from the keywords field.
247fn extract_keywords(json: &Value) -> Vec<String> {
248    json.get(FIELD_KEYWORDS)
249        .and_then(|v| v.as_array())
250        .map(|arr| {
251            arr.iter()
252                .take(MAX_ITERATION_COUNT)
253                .filter_map(|v| v.as_str())
254                .map(|s| truncate_field(s.to_string()))
255                .collect()
256        })
257        .unwrap_or_default()
258}
259
260/// Extracts parties (authors) from the authors field.
261/// Authors can be strings or objects with name, email, and homepage fields.
262fn extract_parties(json: &Value) -> Vec<Party> {
263    let mut parties = Vec::new();
264
265    if let Some(authors) = json.get(FIELD_AUTHORS).and_then(|v| v.as_array()) {
266        for author in authors.iter().take(MAX_ITERATION_COUNT) {
267            if let Some(party) = extract_party_from_author(author) {
268                parties.push(party);
269            }
270        }
271    }
272
273    parties
274}
275
276/// Extracts a single party from an author value (string or object).
277fn extract_party_from_author(author: &Value) -> Option<Party> {
278    match author {
279        Value::String(s) => {
280            let (name, email) = parse_author_string(s);
281            Some(Party {
282                r#type: Some("person".to_string()),
283                role: Some("author".to_string()),
284                name: name.map(truncate_field),
285                email: email.map(truncate_field),
286                url: None,
287                organization: None,
288                organization_url: None,
289                timezone: None,
290            })
291        }
292        Value::Object(obj) => {
293            let name = obj
294                .get("name")
295                .and_then(|v| v.as_str())
296                .map(|s| truncate_field(s.to_string()));
297            let email = obj
298                .get("email")
299                .and_then(|v| v.as_str())
300                .map(|s| truncate_field(s.to_string()));
301            let url = obj
302                .get("homepage")
303                .and_then(|v| v.as_str())
304                .map(|s| truncate_field(s.to_string()));
305
306            Some(Party {
307                r#type: Some("person".to_string()),
308                role: Some("author".to_string()),
309                name,
310                email,
311                url,
312                organization: None,
313                organization_url: None,
314                timezone: None,
315            })
316        }
317        _ => Some(Party {
318            r#type: Some("person".to_string()),
319            role: Some("author".to_string()),
320            name: Some(truncate_field(format!("{:?}", author))),
321            email: None,
322            url: None,
323            organization: None,
324            organization_url: None,
325            timezone: None,
326        }),
327    }
328}
329
330/// Parses author string in "Name <email>" format.
331/// Returns (name, email) tuple with both as Option<String>.
332fn parse_author_string(author_str: &str) -> (Option<String>, Option<String>) {
333    if let Some(email_start) = author_str.find('<')
334        && let Some(email_end) = author_str.find('>')
335        && email_start < email_end
336    {
337        let name = author_str[..email_start].trim();
338        let email = author_str[email_start + 1..email_end].trim();
339
340        let name = if name.is_empty() {
341            None
342        } else {
343            Some(truncate_field(name.to_string()))
344        };
345        let email = if email.is_empty() {
346            None
347        } else {
348            Some(truncate_field(email.to_string()))
349        };
350
351        return (name, email);
352    }
353
354    let trimmed = author_str.trim();
355    if trimmed.is_empty() {
356        (None, None)
357    } else {
358        (Some(truncate_field(trimmed.to_string())), None)
359    }
360}
361
362/// Extracts VCS URL from the repository field.
363/// Repository can be an object with type and url fields.
364fn extract_vcs_url(json: &Value) -> Option<String> {
365    json.get(FIELD_REPOSITORY).and_then(|repo| {
366        if let Some(repo_obj) = repo.as_object() {
367            let repo_type = repo_obj.get("type").and_then(|v| v.as_str());
368            let repo_url = repo_obj.get("url").and_then(|v| v.as_str());
369
370            match (repo_type, repo_url) {
371                (Some(t), Some(u)) if !t.is_empty() && !u.is_empty() => {
372                    Some(truncate_field(format!("{}+{}", t, u)))
373                }
374                _ => None,
375            }
376        } else {
377            None
378        }
379    })
380}
381
382/// Extracts dependencies from a dependency field.
383fn extract_dependencies(
384    json: &Value,
385    field: &str,
386    scope: &str,
387    is_runtime: bool,
388) -> Vec<Dependency> {
389    json.get(field)
390        .and_then(|deps| deps.as_object())
391        .map_or_else(Vec::new, |deps| {
392            deps.iter()
393                .take(MAX_ITERATION_COUNT)
394                .filter_map(|(name, requirement)| {
395                    let requirement_str = requirement.as_str()?;
396                    let package_url =
397                        PackageUrl::new(BowerJsonParser::PACKAGE_TYPE.as_str(), name).ok()?;
398
399                    Some(Dependency {
400                        purl: Some(truncate_field(package_url.to_string())),
401                        extracted_requirement: Some(truncate_field(requirement_str.to_string())),
402                        scope: Some(scope.to_string()),
403                        is_runtime: Some(is_runtime),
404                        is_optional: Some(!is_runtime),
405                        is_pinned: None,
406                        is_direct: Some(true),
407                        resolved_package: None,
408                        extra_data: None,
409                    })
410                })
411                .collect()
412        })
413}
414
415fn default_package_data() -> PackageData {
416    PackageData {
417        package_type: Some(BowerJsonParser::PACKAGE_TYPE),
418        primary_language: Some("JavaScript".to_string()),
419        datasource_id: Some(DatasourceId::BowerJson),
420        ..Default::default()
421    }
422}